mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 05:19:58 +00:00
Compare commits
8 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a33a0670f6 | ||
|
|
14c9ff46d1 | ||
|
|
1865f7decf | ||
|
|
a608621476 | ||
|
|
00514999ff | ||
|
|
b3b597fef6 | ||
|
|
bf17144591 | ||
|
|
09e110525f |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.16.1-beta.1"
|
||||
current_version = "0.16.1-beta.2"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
463
Cargo.lock
generated
463
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
20
Cargo.toml
20
Cargo.toml
@@ -23,14 +23,14 @@ rust-version = "1.78.0"
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.23.1", "features" = [
|
||||
"dynamodb",
|
||||
], git = "https://github.com/lancedb/lance.git", tag = "v0.23.1-beta.2"}
|
||||
lance-io = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"}
|
||||
lance-index = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"}
|
||||
lance-linalg = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"}
|
||||
lance-table = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"}
|
||||
lance-testing = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"}
|
||||
lance-datafusion = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"}
|
||||
lance-encoding = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"}
|
||||
], git = "https://github.com/lancedb/lance.git", tag = "v0.23.1-beta.4"}
|
||||
lance-io = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
|
||||
lance-index = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
|
||||
lance-linalg = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
|
||||
lance-table = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
|
||||
lance-testing = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
|
||||
lance-datafusion = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
|
||||
lance-encoding = {version = "=0.23.1", tag="v0.23.1-beta.4", git = "https://github.com/lancedb/lance.git"}
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "53.2", optional = false }
|
||||
arrow-array = "53.2"
|
||||
@@ -55,9 +55,9 @@ half = { "version" = "=2.4.1", default-features = false, features = [
|
||||
futures = "0"
|
||||
log = "0.4"
|
||||
moka = { version = "0.12", features = ["future"] }
|
||||
object_store = "0.10.2"
|
||||
object_store = "0.11.0"
|
||||
pin-project = "1.0.7"
|
||||
snafu = "0.7.4"
|
||||
snafu = "0.8"
|
||||
url = "2"
|
||||
num-traits = "0.2"
|
||||
rand = "0.8"
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.16.1-beta.1</version>
|
||||
<version>0.16.1-beta.2</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.16.1-beta.1</version>
|
||||
<version>0.16.1-beta.2</version>
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<name>LanceDB Parent</name>
|
||||
|
||||
124
node/package-lock.json
generated
124
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.16.1-beta.1",
|
||||
"version": "0.16.1-beta.2",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "vectordb",
|
||||
"version": "0.16.1-beta.1",
|
||||
"version": "0.16.1-beta.2",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
@@ -52,14 +52,14 @@
|
||||
"uuid": "^9.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.16.1-beta.1",
|
||||
"@lancedb/vectordb-darwin-x64": "0.16.1-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.16.1-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.16.1-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.16.1-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.16.1-beta.1",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.16.1-beta.1",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.16.1-beta.1"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.16.1-beta.2",
|
||||
"@lancedb/vectordb-darwin-x64": "0.16.1-beta.2",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.16.1-beta.2",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.16.1-beta.2",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.16.1-beta.2",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.16.1-beta.2",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.16.1-beta.2",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.16.1-beta.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@apache-arrow/ts": "^14.0.2",
|
||||
@@ -329,110 +329,6 @@
|
||||
"@jridgewell/sourcemap-codec": "^1.4.10"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||
"version": "0.16.1-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.16.1-beta.1.tgz",
|
||||
"integrity": "sha512-dV/I7K9a8OZUk6JaY1mOFRCtq0+ae2erRaDe6AQVeA6t+IVClQaYkHiYUQa5wHqN0QTb2GU7qUCRgA8ZLilVuw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||
"version": "0.16.1-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.16.1-beta.1.tgz",
|
||||
"integrity": "sha512-Fyn9zTDUrYK/bjriHSu/0TiIA7NJmjQQxqzjcwFOMsghL8D0FQuxqHYWKoJSaJv6me6Kd41R4qesC9mrt2ZGDw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||
"version": "0.16.1-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.16.1-beta.1.tgz",
|
||||
"integrity": "sha512-2vcSfg6pVzEkA7tqQssBQgnf7Rbw5fp1xdhnd0X5h4z6Xqs6CpynbkfsyMjBRuL7bDv4phl1lvJmlb3Zfre7mg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-arm64-musl": {
|
||||
"version": "0.16.1-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-musl/-/vectordb-linux-arm64-musl-0.16.1-beta.1.tgz",
|
||||
"integrity": "sha512-rxE/G2PRKTU+fi5j1ojBV9dKfVLKGLnKgLezGryPkKwMYokCQ8kGxohKTmrmLaclXZpBDQo4AYIIE0oJhXG04g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||
"version": "0.16.1-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.16.1-beta.1.tgz",
|
||||
"integrity": "sha512-XWqRMif+wVTfcAPQI9n/H6eNmfwPHWBo4e0SAdWw+eifaBDxrcSTDhPpIrW+NGTJMhlm01rD7Qzh4j0y/Vcy5w==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-x64-musl": {
|
||||
"version": "0.16.1-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-musl/-/vectordb-linux-x64-musl-0.16.1-beta.1.tgz",
|
||||
"integrity": "sha512-W/OV5aXAbqhQQ8NYLJG5W/ZdHwpfjiS/9VTVVo44cOznCO6tijiSsrLYIyIw/rHU+ORhYi7My+5+JmkrIoQIgQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-win32-arm64-msvc": {
|
||||
"version": "0.16.1-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-arm64-msvc/-/vectordb-win32-arm64-msvc-0.16.1-beta.1.tgz",
|
||||
"integrity": "sha512-jy18KmJUem+lInjODNfPLSXM8Dmb9xydcab/IoSPB+6qhEn/QC0HW3R1KkR3I6cfryYkEXsNd35pNdtajn9DsQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||
"version": "0.16.1-beta.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.16.1-beta.1.tgz",
|
||||
"integrity": "sha512-ObGYHAkvyOxJA3QTxMukXaHCe29J2Pbsm90w2c+E3zGb8K5Xe4pLTqojGEDYrtNKZhf2d9YMsoQq8dg6mLsbag==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
]
|
||||
},
|
||||
"node_modules/@neon-rs/cli": {
|
||||
"version": "0.0.160",
|
||||
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.16.1-beta.1",
|
||||
"version": "0.16.1-beta.2",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"private": false,
|
||||
"main": "dist/index.js",
|
||||
@@ -92,13 +92,13 @@
|
||||
}
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-x64": "0.16.1-beta.1",
|
||||
"@lancedb/vectordb-darwin-arm64": "0.16.1-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.16.1-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.16.1-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.16.1-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.16.1-beta.1",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.16.1-beta.1",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.16.1-beta.1"
|
||||
"@lancedb/vectordb-darwin-x64": "0.16.1-beta.2",
|
||||
"@lancedb/vectordb-darwin-arm64": "0.16.1-beta.2",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.16.1-beta.2",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.16.1-beta.2",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.16.1-beta.2",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.16.1-beta.2",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.16.1-beta.2",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.16.1-beta.2"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.16.1-beta.1"
|
||||
version = "0.16.1-beta.2"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.16.1-beta.1",
|
||||
"version": "0.16.1-beta.2",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.16.1-beta.1",
|
||||
"version": "0.16.1-beta.2",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.16.1-beta.1",
|
||||
"version": "0.16.1-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.16.1-beta.1",
|
||||
"version": "0.16.1-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.16.1-beta.1",
|
||||
"version": "0.16.1-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.16.1-beta.1",
|
||||
"version": "0.16.1-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.16.1-beta.1",
|
||||
"version": "0.16.1-beta.2",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.16.1-beta.1",
|
||||
"version": "0.16.1-beta.2",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.16.1-beta.1",
|
||||
"version": "0.16.1-beta.2",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.16.1-beta.1",
|
||||
"version": "0.16.1-beta.2",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.16.1-beta.1",
|
||||
"version": "0.16.1-beta.2",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.19.1-beta.2"
|
||||
current_version = "0.19.1-beta.3"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.19.1-beta.2"
|
||||
version = "0.19.1-beta.3"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -199,18 +199,29 @@ else:
|
||||
]
|
||||
|
||||
|
||||
def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
|
||||
if inspect.isclass(tp):
|
||||
if issubclass(tp, pydantic.BaseModel):
|
||||
# Struct
|
||||
fields = _pydantic_model_to_fields(tp)
|
||||
return pa.struct(fields)
|
||||
if issubclass(tp, FixedSizeListMixin):
|
||||
return pa.list_(tp.value_arrow_type(), tp.dim())
|
||||
return _py_type_to_arrow_type(tp, field)
|
||||
|
||||
|
||||
def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType:
|
||||
"""Convert a Pydantic FieldInfo to Arrow DataType"""
|
||||
|
||||
if isinstance(field.annotation, (_GenericAlias, GenericAlias)):
|
||||
origin = field.annotation.__origin__
|
||||
args = field.annotation.__args__
|
||||
|
||||
if origin is list:
|
||||
child = args[0]
|
||||
return pa.list_(_py_type_to_arrow_type(child, field))
|
||||
elif origin == Union:
|
||||
if len(args) == 2 and args[1] is type(None):
|
||||
return _py_type_to_arrow_type(args[0], field)
|
||||
return _pydantic_type_to_arrow_type(args[0], field)
|
||||
elif sys.version_info >= (3, 10) and isinstance(field.annotation, types.UnionType):
|
||||
args = field.annotation.__args__
|
||||
if len(args) == 2:
|
||||
@@ -218,14 +229,7 @@ def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType:
|
||||
if typ is type(None):
|
||||
continue
|
||||
return _py_type_to_arrow_type(typ, field)
|
||||
elif inspect.isclass(field.annotation):
|
||||
if issubclass(field.annotation, pydantic.BaseModel):
|
||||
# Struct
|
||||
fields = _pydantic_model_to_fields(field.annotation)
|
||||
return pa.struct(fields)
|
||||
elif issubclass(field.annotation, FixedSizeListMixin):
|
||||
return pa.list_(field.annotation.value_arrow_type(), field.annotation.dim())
|
||||
return _py_type_to_arrow_type(field.annotation, field)
|
||||
return _pydantic_type_to_arrow_type(field.annotation, field)
|
||||
|
||||
|
||||
def is_nullable(field: FieldInfo) -> bool:
|
||||
|
||||
@@ -10,6 +10,7 @@ import pyarrow as pa
|
||||
import pydantic
|
||||
import pytest
|
||||
from lancedb.pydantic import PYDANTIC_VERSION, LanceModel, Vector, pydantic_to_schema
|
||||
from pydantic import BaseModel
|
||||
from pydantic import Field
|
||||
|
||||
|
||||
@@ -252,3 +253,104 @@ def test_lance_model():
|
||||
|
||||
t = TestModel()
|
||||
assert t == TestModel(vec=[0.0] * 16, li=[1, 2, 3])
|
||||
|
||||
|
||||
def test_optional_nested_model():
|
||||
class WAMedia(BaseModel):
|
||||
url: str
|
||||
mimetype: str
|
||||
filename: Optional[str]
|
||||
error: Optional[str]
|
||||
data: bytes
|
||||
|
||||
class WALocation(BaseModel):
|
||||
description: Optional[str]
|
||||
latitude: str
|
||||
longitude: str
|
||||
|
||||
class ReplyToMessage(BaseModel):
|
||||
id: str
|
||||
participant: str
|
||||
body: str
|
||||
|
||||
class Message(BaseModel):
|
||||
id: str
|
||||
timestamp: int
|
||||
from_: str
|
||||
fromMe: bool
|
||||
to: str
|
||||
body: str
|
||||
hasMedia: Optional[bool]
|
||||
media: WAMedia
|
||||
mediaUrl: Optional[str]
|
||||
ack: Optional[int]
|
||||
ackName: Optional[str]
|
||||
author: Optional[str]
|
||||
location: Optional[WALocation]
|
||||
vCards: Optional[List[str]]
|
||||
replyTo: Optional[ReplyToMessage]
|
||||
|
||||
class AnyEvent(LanceModel):
|
||||
id: str
|
||||
session: str
|
||||
metadata: Optional[str] = None
|
||||
engine: str
|
||||
event: str
|
||||
|
||||
class MessageEvent(AnyEvent):
|
||||
payload: Message
|
||||
|
||||
schema = pydantic_to_schema(MessageEvent)
|
||||
|
||||
payload = schema.field("payload")
|
||||
assert payload.type == pa.struct(
|
||||
[
|
||||
pa.field("id", pa.utf8(), False),
|
||||
pa.field("timestamp", pa.int64(), False),
|
||||
pa.field("from_", pa.utf8(), False),
|
||||
pa.field("fromMe", pa.bool_(), False),
|
||||
pa.field("to", pa.utf8(), False),
|
||||
pa.field("body", pa.utf8(), False),
|
||||
pa.field("hasMedia", pa.bool_(), True),
|
||||
pa.field(
|
||||
"media",
|
||||
pa.struct(
|
||||
[
|
||||
pa.field("url", pa.utf8(), False),
|
||||
pa.field("mimetype", pa.utf8(), False),
|
||||
pa.field("filename", pa.utf8(), True),
|
||||
pa.field("error", pa.utf8(), True),
|
||||
pa.field("data", pa.binary(), False),
|
||||
]
|
||||
),
|
||||
False,
|
||||
),
|
||||
pa.field("mediaUrl", pa.utf8(), True),
|
||||
pa.field("ack", pa.int64(), True),
|
||||
pa.field("ackName", pa.utf8(), True),
|
||||
pa.field("author", pa.utf8(), True),
|
||||
pa.field(
|
||||
"location",
|
||||
pa.struct(
|
||||
[
|
||||
pa.field("description", pa.utf8(), True),
|
||||
pa.field("latitude", pa.utf8(), False),
|
||||
pa.field("longitude", pa.utf8(), False),
|
||||
]
|
||||
),
|
||||
True, # Optional
|
||||
),
|
||||
pa.field("vCards", pa.list_(pa.utf8()), True),
|
||||
pa.field(
|
||||
"replyTo",
|
||||
pa.struct(
|
||||
[
|
||||
pa.field("id", pa.utf8(), False),
|
||||
pa.field("participant", pa.utf8(), False),
|
||||
pa.field("body", pa.utf8(), False),
|
||||
]
|
||||
),
|
||||
True,
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -232,6 +232,71 @@ async def test_distance_range_async(table_async: AsyncTable):
|
||||
assert res["_distance"].to_pylist() == [min_dist, max_dist]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_distance_range_with_new_rows_async():
|
||||
conn = await lancedb.connect_async(
|
||||
"memory://", read_consistency_interval=timedelta(seconds=0)
|
||||
)
|
||||
data = pa.table(
|
||||
{
|
||||
"vector": pa.FixedShapeTensorArray.from_numpy_ndarray(
|
||||
np.random.rand(256, 2)
|
||||
),
|
||||
}
|
||||
)
|
||||
table = await conn.create_table("test", data)
|
||||
table.create_index("vector", config=IvfPq(num_partitions=1, num_sub_vectors=2))
|
||||
|
||||
q = [0, 0]
|
||||
rs = await table.query().nearest_to(q).to_arrow()
|
||||
dists = rs["_distance"].to_pylist()
|
||||
min_dist = dists[0]
|
||||
max_dist = dists[-1]
|
||||
|
||||
# append more rows so that execution plan would be mixed with ANN & Flat KNN
|
||||
new_data = pa.table(
|
||||
{
|
||||
"vector": pa.FixedShapeTensorArray.from_numpy_ndarray(np.random.rand(4, 2)),
|
||||
}
|
||||
)
|
||||
await table.add(new_data)
|
||||
|
||||
res = (
|
||||
await table.query()
|
||||
.nearest_to(q)
|
||||
.distance_range(upper_bound=min_dist)
|
||||
.to_arrow()
|
||||
)
|
||||
assert len(res) == 0
|
||||
|
||||
res = (
|
||||
await table.query()
|
||||
.nearest_to(q)
|
||||
.distance_range(lower_bound=max_dist)
|
||||
.to_arrow()
|
||||
)
|
||||
for dist in res["_distance"].to_pylist():
|
||||
assert dist >= max_dist
|
||||
|
||||
res = (
|
||||
await table.query()
|
||||
.nearest_to(q)
|
||||
.distance_range(upper_bound=max_dist)
|
||||
.to_arrow()
|
||||
)
|
||||
for dist in res["_distance"].to_pylist():
|
||||
assert dist < max_dist
|
||||
|
||||
res = (
|
||||
await table.query()
|
||||
.nearest_to(q)
|
||||
.distance_range(lower_bound=min_dist)
|
||||
.to_arrow()
|
||||
)
|
||||
for dist in res["_distance"].to_pylist():
|
||||
assert dist >= min_dist
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"multivec_table", [pa.float16(), pa.float32(), pa.float64()], indirect=True
|
||||
)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-node"
|
||||
version = "0.16.1-beta.1"
|
||||
version = "0.16.1-beta.2"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.16.1-beta.1"
|
||||
version = "0.16.1-beta.2"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
@@ -10,7 +10,7 @@ use crate::index::IndexStatistics;
|
||||
use crate::query::{QueryRequest, Select, VectorQueryRequest};
|
||||
use crate::table::{AddDataMode, AnyQuery, Filter};
|
||||
use crate::utils::{supported_btree_data_type, supported_vector_data_type};
|
||||
use crate::{DistanceType, Error, Table};
|
||||
use crate::{DistanceType, Error};
|
||||
use arrow_array::RecordBatchReader;
|
||||
use arrow_ipc::reader::FileReader;
|
||||
use arrow_schema::{DataType, SchemaRef};
|
||||
@@ -24,7 +24,7 @@ use http::StatusCode;
|
||||
use lance::arrow::json::{JsonDataType, JsonSchema};
|
||||
use lance::dataset::scanner::DatasetRecordBatchStream;
|
||||
use lance::dataset::{ColumnAlteration, NewColumnTransform, Version};
|
||||
use lance_datafusion::exec::{execute_plan, OneShotExec};
|
||||
use lance_datafusion::exec::OneShotExec;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
@@ -205,10 +205,10 @@ impl<S: HttpSend> RemoteTable<S> {
|
||||
}
|
||||
|
||||
fn apply_vector_query_params(
|
||||
mut body: serde_json::Value,
|
||||
body: &mut serde_json::Value,
|
||||
query: &VectorQueryRequest,
|
||||
) -> Result<Vec<serde_json::Value>> {
|
||||
Self::apply_query_params(&mut body, &query.base)?;
|
||||
) -> Result<()> {
|
||||
Self::apply_query_params(body, &query.base)?;
|
||||
|
||||
// Apply general parameters, before we dispatch based on number of query vectors.
|
||||
body["prefilter"] = query.base.prefilter.into();
|
||||
@@ -254,22 +254,21 @@ impl<S: HttpSend> RemoteTable<S> {
|
||||
0 => {
|
||||
// Server takes empty vector, not null or undefined.
|
||||
body["vector"] = serde_json::Value::Array(Vec::new());
|
||||
Ok(vec![body])
|
||||
}
|
||||
1 => {
|
||||
body["vector"] = vector_to_json(&query.query_vector[0])?;
|
||||
Ok(vec![body])
|
||||
}
|
||||
_ => {
|
||||
let mut bodies = Vec::with_capacity(query.query_vector.len());
|
||||
for vector in &query.query_vector {
|
||||
let mut body = body.clone();
|
||||
body["vector"] = vector_to_json(vector)?;
|
||||
bodies.push(body);
|
||||
}
|
||||
Ok(bodies)
|
||||
let vectors = query
|
||||
.query_vector
|
||||
.iter()
|
||||
.map(vector_to_json)
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
body["vector"] = serde_json::Value::Array(vectors);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn check_mutable(&self) -> Result<()> {
|
||||
@@ -294,7 +293,7 @@ impl<S: HttpSend> RemoteTable<S> {
|
||||
&self,
|
||||
query: &AnyQuery,
|
||||
_options: QueryExecutionOptions,
|
||||
) -> Result<Vec<Pin<Box<dyn RecordBatchStream + Send>>>> {
|
||||
) -> Result<Pin<Box<dyn RecordBatchStream + Send>>> {
|
||||
let request = self.client.post(&format!("/v1/table/{}/query/", self.name));
|
||||
|
||||
let version = self.current_version().await;
|
||||
@@ -305,28 +304,16 @@ impl<S: HttpSend> RemoteTable<S> {
|
||||
Self::apply_query_params(&mut body, query)?;
|
||||
// Empty vector can be passed if no vector search is performed.
|
||||
body["vector"] = serde_json::Value::Array(Vec::new());
|
||||
|
||||
let request = request.json(&body);
|
||||
|
||||
let (request_id, response) = self.client.send(request, true).await?;
|
||||
|
||||
let stream = self.read_arrow_stream(&request_id, response).await?;
|
||||
Ok(vec![stream])
|
||||
}
|
||||
AnyQuery::VectorQuery(query) => {
|
||||
let bodies = Self::apply_vector_query_params(body, query)?;
|
||||
let mut futures = Vec::with_capacity(bodies.len());
|
||||
for body in bodies {
|
||||
let request = request.try_clone().unwrap().json(&body);
|
||||
let future = async move {
|
||||
let (request_id, response) = self.client.send(request, true).await?;
|
||||
self.read_arrow_stream(&request_id, response).await
|
||||
};
|
||||
futures.push(future);
|
||||
}
|
||||
futures::future::try_join_all(futures).await
|
||||
Self::apply_vector_query_params(&mut body, query)?;
|
||||
}
|
||||
}
|
||||
|
||||
let request = request.json(&body);
|
||||
let (request_id, response) = self.client.send(request, true).await?;
|
||||
let stream = self.read_arrow_stream(&request_id, response).await?;
|
||||
Ok(stream)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -498,18 +485,8 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
query: &AnyQuery,
|
||||
options: QueryExecutionOptions,
|
||||
) -> Result<Arc<dyn ExecutionPlan>> {
|
||||
let streams = self.execute_query(query, options).await?;
|
||||
|
||||
if streams.len() == 1 {
|
||||
let stream = streams.into_iter().next().unwrap();
|
||||
Ok(Arc::new(OneShotExec::new(stream)))
|
||||
} else {
|
||||
let stream_execs = streams
|
||||
.into_iter()
|
||||
.map(|stream| Arc::new(OneShotExec::new(stream)) as Arc<dyn ExecutionPlan>)
|
||||
.collect();
|
||||
Table::multi_vector_plan(stream_execs)
|
||||
}
|
||||
let stream = self.execute_query(query, options).await?;
|
||||
Ok(Arc::new(OneShotExec::new(stream)))
|
||||
}
|
||||
|
||||
async fn query(
|
||||
@@ -517,24 +494,8 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
query: &AnyQuery,
|
||||
_options: QueryExecutionOptions,
|
||||
) -> Result<DatasetRecordBatchStream> {
|
||||
let streams = self.execute_query(query, _options).await?;
|
||||
|
||||
if streams.len() == 1 {
|
||||
Ok(DatasetRecordBatchStream::new(
|
||||
streams.into_iter().next().unwrap(),
|
||||
))
|
||||
} else {
|
||||
let stream_execs = streams
|
||||
.into_iter()
|
||||
.map(|stream| Arc::new(OneShotExec::new(stream)) as Arc<dyn ExecutionPlan>)
|
||||
.collect();
|
||||
let plan = Table::multi_vector_plan(stream_execs)?;
|
||||
|
||||
Ok(DatasetRecordBatchStream::new(execute_plan(
|
||||
plan,
|
||||
Default::default(),
|
||||
)?))
|
||||
}
|
||||
let stream = self.execute_query(query, _options).await?;
|
||||
Ok(DatasetRecordBatchStream::new(stream))
|
||||
}
|
||||
async fn update(&self, update: UpdateBuilder) -> Result<u64> {
|
||||
self.check_mutable().await?;
|
||||
@@ -1500,9 +1461,21 @@ mod tests {
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
JSON_CONTENT_TYPE
|
||||
);
|
||||
let body: serde_json::Value =
|
||||
serde_json::from_slice(request.body().unwrap().as_bytes().unwrap()).unwrap();
|
||||
let query_vectors = body["vector"].as_array().unwrap();
|
||||
assert_eq!(query_vectors.len(), 2);
|
||||
assert_eq!(query_vectors[0].as_array().unwrap().len(), 3);
|
||||
assert_eq!(query_vectors[1].as_array().unwrap().len(), 3);
|
||||
let data = RecordBatch::try_new(
|
||||
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
Arc::new(Schema::new(vec![
|
||||
Field::new("a", DataType::Int32, false),
|
||||
Field::new("query_index", DataType::Int32, false),
|
||||
])),
|
||||
vec![
|
||||
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6])),
|
||||
Arc::new(Int32Array::from(vec![0, 0, 0, 1, 1, 1])),
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
let response_body = write_ipc_file(&data);
|
||||
@@ -1519,8 +1492,6 @@ mod tests {
|
||||
.unwrap()
|
||||
.add_query_vector(vec![0.4, 0.5, 0.6])
|
||||
.unwrap();
|
||||
let plan = query.explain_plan(true).await.unwrap();
|
||||
assert!(plan.contains("UnionExec"), "Plan: {}", plan);
|
||||
|
||||
let results = query
|
||||
.execute()
|
||||
|
||||
@@ -1996,8 +1996,8 @@ impl BaseTable for NativeTable {
|
||||
};
|
||||
|
||||
let ds_ref = self.dataset.get().await?;
|
||||
let mut column = query.column.clone();
|
||||
let schema = ds_ref.schema();
|
||||
let mut column = query.column.clone();
|
||||
|
||||
let mut query_vector = query.query_vector.first().cloned();
|
||||
if query.query_vector.len() > 1 {
|
||||
|
||||
Reference in New Issue
Block a user