Compare commits

..

5 Commits

Author SHA1 Message Date
rmeng
cc7a503faa feat: connection pool for sync client 2024-11-25 14:20:37 -05:00
Lei Xu
2ded17452b fix(python)!: handle bad openai embeddings gracefully (#1873)
BREAKING-CHANGE: change Pydantic Vector field to be nullable by default.
Closes #1577
2024-11-23 13:33:52 -08:00
Mr. Doge
dfd9d2ac99 ci: musl missing node/package.json targets (#1870)
I missed targets when manually merging draft PR to updated main
I was copying from:
https://github.com/lancedb/lancedb/pull/1816/files#diff-d6e19f28e97cfeda63a9bd9426f10f1d2454eeed375ee1235e8ba842ceeb46a0

fixes:
error: Rust target x86_64-unknown-linux-musl not found in package.json.
2024-11-22 10:40:59 -08:00
Lance Release
162880140e Updating package-lock.json 2024-11-21 21:53:25 +00:00
Lance Release
99d9ced6d5 Bump version: 0.13.0 → 0.13.1-beta.0 2024-11-21 21:53:01 +00:00
26 changed files with 250 additions and 160 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.13.0" current_version = "0.13.1-beta.0"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -8,7 +8,7 @@
<parent> <parent>
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.13.0-final.0</version> <version>0.13.1-beta.0</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.13.0-final.0</version> <version>0.13.1-beta.0</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>LanceDB Parent</name> <name>LanceDB Parent</name>

78
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.13.0", "version": "0.13.1-beta.0",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "vectordb", "name": "vectordb",
"version": "0.13.0", "version": "0.13.1-beta.0",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"
@@ -52,12 +52,14 @@
"uuid": "^9.0.0" "uuid": "^9.0.0"
}, },
"optionalDependencies": { "optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.13.0", "@lancedb/vectordb-darwin-arm64": "0.13.1-beta.0",
"@lancedb/vectordb-darwin-x64": "0.13.0", "@lancedb/vectordb-darwin-x64": "0.13.1-beta.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.13.0", "@lancedb/vectordb-linux-arm64-gnu": "0.13.1-beta.0",
"@lancedb/vectordb-linux-x64-gnu": "0.13.0", "@lancedb/vectordb-linux-arm64-musl": "0.13.1-beta.0",
"@lancedb/vectordb-win32-arm64-msvc": "0.13.0", "@lancedb/vectordb-linux-x64-gnu": "0.13.1-beta.0",
"@lancedb/vectordb-win32-x64-msvc": "0.13.0" "@lancedb/vectordb-linux-x64-musl": "0.13.1-beta.0",
"@lancedb/vectordb-win32-arm64-msvc": "0.13.1-beta.0",
"@lancedb/vectordb-win32-x64-msvc": "0.13.1-beta.0"
}, },
"peerDependencies": { "peerDependencies": {
"@apache-arrow/ts": "^14.0.2", "@apache-arrow/ts": "^14.0.2",
@@ -327,66 +329,6 @@
"@jridgewell/sourcemap-codec": "^1.4.10" "@jridgewell/sourcemap-codec": "^1.4.10"
} }
}, },
"node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.13.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.13.0.tgz",
"integrity": "sha512-8hdcjkRmgrdQYf1jN+DyZae40LIv8UUfnWy70Uid5qy63sSvRW/+MvIdqIPFr9QlLUXmpyyQuX0y3bZhUR99cQ==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.13.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.13.0.tgz",
"integrity": "sha512-fWzAY4l5SQtNfMYh80v+M66ugZHhdxbkpk5mNEv6Zsug3DL6kRj3Uv31/i0wgzY6F5G3LUlbjZerN+eTnDLwOw==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.13.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.13.0.tgz",
"integrity": "sha512-ltwAT9baOSuR5YiGykQXPC8/HGYF13vpI47qxhP9yfgiz9pA8EUn8p8YrBRzq7J4DIZ4b8JSVDXQnMIqEtB4Kg==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.13.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.13.0.tgz",
"integrity": "sha512-MiT/RBlMPGGRh7BX+MXwRuNiiUnKmuDcHH8nm88IH28T7TQxXIbA9w6UpSg5m9f3DgKQI2K8oLi29oKIB8ZwDQ==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.13.0",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.13.0.tgz",
"integrity": "sha512-SovP/hwWYLJIy65DKbVuXlBPTb/nwvVpTO6dh9zRch+L5ek6JmVAkwsfeTS2p5bMa8VPujsCXYUAVuCDEJU8wg==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"win32"
]
},
"node_modules/@neon-rs/cli": { "node_modules/@neon-rs/cli": {
"version": "0.0.160", "version": "0.0.160",
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz", "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",

View File

@@ -1,6 +1,6 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.13.0", "version": "0.13.1-beta.0",
"description": " Serverless, low-latency vector database for AI applications", "description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/index.d.ts", "types": "dist/index.d.ts",
@@ -84,18 +84,20 @@
"aarch64-apple-darwin": "@lancedb/vectordb-darwin-arm64", "aarch64-apple-darwin": "@lancedb/vectordb-darwin-arm64",
"x86_64-unknown-linux-gnu": "@lancedb/vectordb-linux-x64-gnu", "x86_64-unknown-linux-gnu": "@lancedb/vectordb-linux-x64-gnu",
"aarch64-unknown-linux-gnu": "@lancedb/vectordb-linux-arm64-gnu", "aarch64-unknown-linux-gnu": "@lancedb/vectordb-linux-arm64-gnu",
"x86_64-unknown-linux-musl": "@lancedb/vectordb-linux-x64-musl",
"aarch64-unknown-linux-musl": "@lancedb/vectordb-linux-arm64-musl",
"x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc", "x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc",
"aarch64-pc-windows-msvc": "@lancedb/vectordb-win32-arm64-msvc" "aarch64-pc-windows-msvc": "@lancedb/vectordb-win32-arm64-msvc"
} }
}, },
"optionalDependencies": { "optionalDependencies": {
"@lancedb/vectordb-darwin-x64": "0.13.0", "@lancedb/vectordb-darwin-x64": "0.13.1-beta.0",
"@lancedb/vectordb-darwin-arm64": "0.13.0", "@lancedb/vectordb-darwin-arm64": "0.13.1-beta.0",
"@lancedb/vectordb-linux-x64-gnu": "0.13.0", "@lancedb/vectordb-linux-x64-gnu": "0.13.1-beta.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.13.0", "@lancedb/vectordb-linux-arm64-gnu": "0.13.1-beta.0",
"@lancedb/vectordb-linux-x64-musl": "0.13.0", "@lancedb/vectordb-linux-x64-musl": "0.13.1-beta.0",
"@lancedb/vectordb-linux-arm64-musl": "0.13.0", "@lancedb/vectordb-linux-arm64-musl": "0.13.1-beta.0",
"@lancedb/vectordb-win32-x64-msvc": "0.13.0", "@lancedb/vectordb-win32-x64-msvc": "0.13.1-beta.0",
"@lancedb/vectordb-win32-arm64-msvc": "0.13.0" "@lancedb/vectordb-win32-arm64-msvc": "0.13.1-beta.0"
} }
} }

View File

@@ -1,7 +1,7 @@
[package] [package]
name = "lancedb-nodejs" name = "lancedb-nodejs"
edition.workspace = true edition.workspace = true
version = "0.13.0" version = "0.13.1-beta.0"
license.workspace = true license.workspace = true
description.workspace = true description.workspace = true
repository.workspace = true repository.workspace = true

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-arm64", "name": "@lancedb/lancedb-darwin-arm64",
"version": "0.13.0", "version": "0.13.1-beta.0",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node", "main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-x64", "name": "@lancedb/lancedb-darwin-x64",
"version": "0.13.0", "version": "0.13.1-beta.0",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.darwin-x64.node", "main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-gnu", "name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.13.0", "version": "0.13.1-beta.0",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node", "main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-musl", "name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.13.0", "version": "0.13.1-beta.0",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node", "main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-gnu", "name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.13.0", "version": "0.13.1-beta.0",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node", "main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-musl", "name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.13.0", "version": "0.13.1-beta.0",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node", "main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-arm64-msvc", "name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.13.0", "version": "0.13.1-beta.0",
"os": [ "os": [
"win32" "win32"
], ],

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-x64-msvc", "name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.13.0", "version": "0.13.1-beta.0",
"os": ["win32"], "os": ["win32"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node", "main": "lancedb.win32-x64-msvc.node",

View File

@@ -10,7 +10,7 @@
"vector database", "vector database",
"ann" "ann"
], ],
"version": "0.13.0", "version": "0.13.1-beta.0",
"main": "dist/index.js", "main": "dist/index.js",
"exports": { "exports": {
".": "./dist/index.js", ".": "./dist/index.js",

View File

@@ -17,11 +17,17 @@ crate-type = ["cdylib"]
arrow = { version = "52.1", features = ["pyarrow"] } arrow = { version = "52.1", features = ["pyarrow"] }
lancedb = { path = "../rust/lancedb", default-features = false } lancedb = { path = "../rust/lancedb", default-features = false }
env_logger.workspace = true env_logger.workspace = true
pyo3 = { version = "0.21", features = ["extension-module", "abi3-py38", "gil-refs"] } pyo3 = { version = "0.21", features = [
"extension-module",
"abi3-py39",
"gil-refs"
] }
# Using this fork for now: https://github.com/awestlake87/pyo3-asyncio/issues/119 # Using this fork for now: https://github.com/awestlake87/pyo3-asyncio/issues/119
# pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] } # pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] }
pyo3-asyncio-0-21 = { version = "0.21.0", features = ["attributes", "tokio-runtime"] } pyo3-asyncio-0-21 = { version = "0.21.0", features = [
"attributes",
"tokio-runtime"
] }
pin-project = "1.1.5" pin-project = "1.1.5"
futures.workspace = true futures.workspace = true
tokio = { version = "1.36.0", features = ["sync"] } tokio = { version = "1.36.0", features = ["sync"] }
@@ -29,14 +35,13 @@ tokio = { version = "1.36.0", features = ["sync"] }
[build-dependencies] [build-dependencies]
pyo3-build-config = { version = "0.20.3", features = [ pyo3-build-config = { version = "0.20.3", features = [
"extension-module", "extension-module",
"abi3-py38", "abi3-py39",
] } ] }
[features] [features]
default = ["default-tls", "remote"] default = ["default-tls", "remote"]
fp16kernels = ["lancedb/fp16kernels"] fp16kernels = ["lancedb/fp16kernels"]
remote = ["lancedb/remote"] remote = ["lancedb/remote"]
# TLS # TLS
default-tls = ["lancedb/default-tls"] default-tls = ["lancedb/default-tls"]
native-tls = ["lancedb/native-tls"] native-tls = ["lancedb/native-tls"]

View File

@@ -31,7 +31,6 @@ classifiers = [
"Programming Language :: Python", "Programming Language :: Python",
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.11",

View File

@@ -83,25 +83,33 @@ class OpenAIEmbeddings(TextEmbeddingFunction):
""" """
openai = attempt_import_or_raise("openai") openai = attempt_import_or_raise("openai")
valid_texts = []
valid_indices = []
for idx, text in enumerate(texts):
if text:
valid_texts.append(text)
valid_indices.append(idx)
# TODO retry, rate limit, token limit # TODO retry, rate limit, token limit
try: try:
if self.name == "text-embedding-ada-002": kwargs = {
rs = self._openai_client.embeddings.create(input=texts, model=self.name) "input": valid_texts,
else: "model": self.name,
kwargs = { }
"input": texts, if self.name != "text-embedding-ada-002":
"model": self.name, kwargs["dimensions"] = self.dim
}
if self.dim: rs = self._openai_client.embeddings.create(**kwargs)
kwargs["dimensions"] = self.dim valid_embeddings = {
rs = self._openai_client.embeddings.create(**kwargs) idx: v.embedding for v, idx in zip(rs.data, valid_indices)
}
except openai.BadRequestError: except openai.BadRequestError:
logging.exception("Bad request: %s", texts) logging.exception("Bad request: %s", texts)
return [None] * len(texts) return [None] * len(texts)
except Exception: except Exception:
logging.exception("OpenAI embeddings error") logging.exception("OpenAI embeddings error")
raise raise
return [v.embedding for v in rs.data] return [valid_embeddings.get(idx, None) for idx in range(len(texts))]
@cached_property @cached_property
def _openai_client(self): def _openai_client(self):

View File

@@ -1,15 +1,5 @@
# Copyright 2023 LanceDB Developers # SPDX-License-Identifier: Apache-2.0
# # SPDX-FileCopyrightText: Copyright The LanceDB Authors
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Pydantic (v1 / v2) adapter for LanceDB""" """Pydantic (v1 / v2) adapter for LanceDB"""
@@ -30,6 +20,7 @@ from typing import (
Type, Type,
Union, Union,
_GenericAlias, _GenericAlias,
GenericAlias,
) )
import numpy as np import numpy as np
@@ -75,7 +66,7 @@ def vector(dim: int, value_type: pa.DataType = pa.float32()):
def Vector( def Vector(
dim: int, value_type: pa.DataType = pa.float32() dim: int, value_type: pa.DataType = pa.float32(), nullable: bool = True
) -> Type[FixedSizeListMixin]: ) -> Type[FixedSizeListMixin]:
"""Pydantic Vector Type. """Pydantic Vector Type.
@@ -88,6 +79,8 @@ def Vector(
The dimension of the vector. The dimension of the vector.
value_type : pyarrow.DataType, optional value_type : pyarrow.DataType, optional
The value type of the vector, by default pa.float32() The value type of the vector, by default pa.float32()
nullable : bool, optional
Whether the vector is nullable, by default it is True.
Examples Examples
-------- --------
@@ -103,7 +96,7 @@ def Vector(
>>> assert schema == pa.schema([ >>> assert schema == pa.schema([
... pa.field("id", pa.int64(), False), ... pa.field("id", pa.int64(), False),
... pa.field("url", pa.utf8(), False), ... pa.field("url", pa.utf8(), False),
... pa.field("embeddings", pa.list_(pa.float32(), 768), False) ... pa.field("embeddings", pa.list_(pa.float32(), 768))
... ]) ... ])
""" """
@@ -112,6 +105,10 @@ def Vector(
def __repr__(self): def __repr__(self):
return f"FixedSizeList(dim={dim})" return f"FixedSizeList(dim={dim})"
@staticmethod
def nullable() -> bool:
return nullable
@staticmethod @staticmethod
def dim() -> int: def dim() -> int:
return dim return dim
@@ -205,9 +202,7 @@ else:
def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType: def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType:
"""Convert a Pydantic FieldInfo to Arrow DataType""" """Convert a Pydantic FieldInfo to Arrow DataType"""
if isinstance(field.annotation, _GenericAlias) or ( if isinstance(field.annotation, (_GenericAlias, GenericAlias)):
sys.version_info > (3, 9) and isinstance(field.annotation, types.GenericAlias)
):
origin = field.annotation.__origin__ origin = field.annotation.__origin__
args = field.annotation.__args__ args = field.annotation.__args__
if origin is list: if origin is list:
@@ -235,7 +230,7 @@ def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType:
def is_nullable(field: FieldInfo) -> bool: def is_nullable(field: FieldInfo) -> bool:
"""Check if a Pydantic FieldInfo is nullable.""" """Check if a Pydantic FieldInfo is nullable."""
if isinstance(field.annotation, _GenericAlias): if isinstance(field.annotation, (_GenericAlias, GenericAlias)):
origin = field.annotation.__origin__ origin = field.annotation.__origin__
args = field.annotation.__args__ args = field.annotation.__args__
if origin == Union: if origin == Union:
@@ -246,6 +241,10 @@ def is_nullable(field: FieldInfo) -> bool:
for typ in args: for typ in args:
if typ is type(None): if typ is type(None):
return True return True
elif inspect.isclass(field.annotation) and issubclass(
field.annotation, FixedSizeListMixin
):
return field.annotation.nullable()
return False return False

View File

@@ -11,6 +11,8 @@ from datetime import date, datetime
from functools import singledispatch from functools import singledispatch
from typing import Tuple, Union, Optional, Any from typing import Tuple, Union, Optional, Any
from urllib.parse import urlparse from urllib.parse import urlparse
from threading import Lock
from contextlib import contextmanager
import numpy as np import numpy as np
import pyarrow as pa import pyarrow as pa
@@ -314,3 +316,27 @@ def deprecated(func):
def validate_table_name(name: str): def validate_table_name(name: str):
"""Verify the table name is valid.""" """Verify the table name is valid."""
native_validate_table_name(name) native_validate_table_name(name)
class ConnectionPool:
def __init__(self, connection_factory, *, max_size: Optional[int] = None):
self.max_size = max_size
self._connection_factory = connection_factory
self._pool = []
self._lock = Lock()
@contextmanager
def connection(self):
with self._lock:
if self._pool:
conn = self._pool.pop()
else:
conn = self._connection_factory()
# release the lock before yielding
try:
yield conn
finally:
with self._lock:
if self.max_size is None or len(self._pool) < self.max_size:
self._pool.append(conn)

View File

@@ -90,10 +90,13 @@ def test_embedding_with_bad_results(tmp_path):
self, texts: Union[List[str], np.ndarray] self, texts: Union[List[str], np.ndarray]
) -> list[Union[np.array, None]]: ) -> list[Union[np.array, None]]:
# Return None, which is bad if field is non-nullable # Return None, which is bad if field is non-nullable
return [ a = [
None if i % 2 == 0 else np.random.randn(self.ndims()) np.full(self.ndims(), np.nan)
if i % 2 == 0
else np.random.randn(self.ndims())
for i in range(len(texts)) for i in range(len(texts))
] ]
return a
db = lancedb.connect(tmp_path) db = lancedb.connect(tmp_path)
registry = EmbeddingFunctionRegistry.get_instance() registry = EmbeddingFunctionRegistry.get_instance()

View File

@@ -1,15 +1,6 @@
# Copyright (c) 2023. LanceDB Developers # SPDX-License-Identifier: Apache-2.0
# # SPDX-FileCopyrightText: Copyright The LanceDB Authors
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import importlib import importlib
import io import io
import os import os
@@ -17,6 +8,7 @@ import os
import lancedb import lancedb
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import pyarrow as pa
import pytest import pytest
from lancedb.embeddings import get_registry from lancedb.embeddings import get_registry
from lancedb.pydantic import LanceModel, Vector from lancedb.pydantic import LanceModel, Vector
@@ -444,6 +436,30 @@ def test_watsonx_embedding(tmp_path):
assert tbl.search("hello").limit(1).to_pandas()["text"][0] == "hello world" assert tbl.search("hello").limit(1).to_pandas()["text"][0] == "hello world"
@pytest.mark.slow
@pytest.mark.skipif(
os.environ.get("OPENAI_API_KEY") is None, reason="OPENAI_API_KEY not set"
)
def test_openai_with_empty_strs(tmp_path):
model = get_registry().get("openai").create(max_retries=0)
class TextModel(LanceModel):
text: str = model.SourceField()
vector: Vector(model.ndims()) = model.VectorField()
df = pd.DataFrame({"text": ["hello world", ""]})
db = lancedb.connect(tmp_path)
tbl = db.create_table("test", schema=TextModel, mode="overwrite")
tbl.add(df, on_bad_vectors="skip")
tb = tbl.to_arrow()
assert tb.schema.field_by_name("vector").type == pa.list_(
pa.float32(), model.ndims()
)
assert len(tb) == 2
assert tb["vector"].is_null().to_pylist() == [False, True]
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.skipif( @pytest.mark.skipif(
importlib.util.find_spec("ollama") is None, reason="Ollama not installed" importlib.util.find_spec("ollama") is None, reason="Ollama not installed"

View File

@@ -1,16 +1,5 @@
# Copyright 2023 LanceDB Developers # SPDX-License-Identifier: Apache-2.0
# # SPDX-FileCopyrightText: Copyright The LanceDB Authors
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json import json
import sys import sys
@@ -172,6 +161,26 @@ def test_pydantic_to_arrow_py38():
assert schema == expect_schema assert schema == expect_schema
def test_nullable_vector():
class NullableModel(pydantic.BaseModel):
vec: Vector(16, nullable=False)
schema = pydantic_to_schema(NullableModel)
assert schema == pa.schema([pa.field("vec", pa.list_(pa.float32(), 16), False)])
class DefaultModel(pydantic.BaseModel):
vec: Vector(16)
schema = pydantic_to_schema(DefaultModel)
assert schema == pa.schema([pa.field("vec", pa.list_(pa.float32(), 16), True)])
class NotNullableModel(pydantic.BaseModel):
vec: Vector(16)
schema = pydantic_to_schema(NotNullableModel)
assert schema == pa.schema([pa.field("vec", pa.list_(pa.float32(), 16), True)])
def test_fixed_size_list_field(): def test_fixed_size_list_field():
class TestModel(pydantic.BaseModel): class TestModel(pydantic.BaseModel):
vec: Vector(16) vec: Vector(16)
@@ -192,7 +201,7 @@ def test_fixed_size_list_field():
schema = pydantic_to_schema(TestModel) schema = pydantic_to_schema(TestModel)
assert schema == pa.schema( assert schema == pa.schema(
[ [
pa.field("vec", pa.list_(pa.float32(), 16), False), pa.field("vec", pa.list_(pa.float32(), 16)),
pa.field("li", pa.list_(pa.int64()), False), pa.field("li", pa.list_(pa.int64()), False),
] ]
) )

View File

@@ -6,13 +6,16 @@ from datetime import timedelta
import http.server import http.server
import json import json
import threading import threading
from concurrent.futures import ThreadPoolExecutor
from unittest.mock import MagicMock from unittest.mock import MagicMock
import uuid import uuid
import lancedb import lancedb
from lancedb.conftest import MockTextEmbeddingFunction from lancedb.conftest import MockTextEmbeddingFunction
from lancedb.remote import ClientConfig from lancedb.remote import ClientConfig
from lancedb.util import ConnectionPool
from lancedb.remote.errors import HttpError, RetryError from lancedb.remote.errors import HttpError, RetryError
import lancedb.util
import pytest import pytest
import pyarrow as pa import pyarrow as pa
@@ -55,6 +58,34 @@ def mock_lancedb_connection(handler):
handle.join() handle.join()
@contextlib.contextmanager
def mock_lancedb_connection_pool(handler):
with http.server.HTTPServer(
("localhost", 8080), make_mock_http_handler(handler)
) as server:
handle = threading.Thread(target=server.serve_forever)
handle.start()
def conn_factory():
lancedb.connect(
"db://dev",
api_key="fake",
host_override="http://localhost:8080",
client_config={
"retry_config": {"retries": 2},
"timeout_config": {
"connect_timeout": 1,
},
},
)
try:
yield ConnectionPool(conn_factory)
finally:
server.shutdown()
handle.join()
@contextlib.asynccontextmanager @contextlib.asynccontextmanager
async def mock_lancedb_connection_async(handler): async def mock_lancedb_connection_async(handler):
with http.server.HTTPServer( with http.server.HTTPServer(
@@ -187,8 +218,7 @@ async def test_retry_error():
assert cause.status_code == 429 assert cause.status_code == 429
@contextlib.contextmanager def http_handler(query_handler):
def query_test_table(query_handler):
def handler(request): def handler(request):
if request.path == "/v1/table/test/describe/": if request.path == "/v1/table/test/describe/":
request.send_response(200) request.send_response(200)
@@ -212,7 +242,12 @@ def query_test_table(query_handler):
request.send_response(404) request.send_response(404)
request.end_headers() request.end_headers()
with mock_lancedb_connection(handler) as db: return handler
@contextlib.contextmanager
def query_test_table(connection_ctx_mgr):
with connection_ctx_mgr as db:
assert repr(db) == "RemoteConnect(name=dev)" assert repr(db) == "RemoteConnect(name=dev)"
table = db.open_table("test") table = db.open_table("test")
assert repr(table) == "RemoteTable(dev.test)" assert repr(table) == "RemoteTable(dev.test)"
@@ -220,6 +255,7 @@ def query_test_table(query_handler):
def test_query_sync_minimal(): def test_query_sync_minimal():
@http_handler
def handler(body): def handler(body):
assert body == { assert body == {
"distance_type": "l2", "distance_type": "l2",
@@ -234,13 +270,53 @@ def test_query_sync_minimal():
return pa.table({"id": [1, 2, 3]}) return pa.table({"id": [1, 2, 3]})
with query_test_table(handler) as table: with query_test_table(mock_lancedb_connection(handler)) as table:
data = table.search([1, 2, 3]).to_list()
expected = [{"id": 1}, {"id": 2}, {"id": 3}]
assert data == expected
with query_test_table(mock_lancedb_connection_pool(handler).connection()) as table:
data = table.search([1, 2, 3]).to_list() data = table.search([1, 2, 3]).to_list()
expected = [{"id": 1}, {"id": 2}, {"id": 3}] expected = [{"id": 1}, {"id": 2}, {"id": 3}]
assert data == expected assert data == expected
def test_query_sync_minimal_threaded():
num_query = 0
@http_handler
def handler(body):
assert body == {
"distance_type": "l2",
"k": 10,
"prefilter": False,
"refine_factor": None,
"ef": None,
"vector": [1.0, 2.0, 3.0],
"nprobes": 20,
"version": None,
}
nonlocal num_query
num_query += 1
return pa.table({"id": [1, 2, 3]})
pool = mock_lancedb_connection_pool(handler)
def _query(i):
with query_test_table(pool.connection()) as table:
data = table.search([1, 2, 3]).to_list()
expected = [{"id": 1}, {"id": 2}, {"id": 3}]
assert data == expected
with ThreadPoolExecutor as exec:
exec.map(_query, range(1000))
assert num_query == 1000
def test_query_sync_empty_query(): def test_query_sync_empty_query():
@http_handler
def handler(body): def handler(body):
assert body == { assert body == {
"k": 10, "k": 10,
@@ -252,7 +328,12 @@ def test_query_sync_empty_query():
return pa.table({"id": [1, 2, 3]}) return pa.table({"id": [1, 2, 3]})
with query_test_table(handler) as table: with query_test_table(mock_lancedb_connection(handler)) as table:
data = table.search(None).where("true").select(["id"]).limit(10).to_list()
expected = [{"id": 1}, {"id": 2}, {"id": 3}]
assert data == expected
with query_test_table(mock_lancedb_connection_pool(handler).connection()) as table:
data = table.search(None).where("true").select(["id"]).limit(10).to_list() data = table.search(None).where("true").select(["id"]).limit(10).to_list()
expected = [{"id": 1}, {"id": 2}, {"id": 3}] expected = [{"id": 1}, {"id": 2}, {"id": 3}]
assert data == expected assert data == expected

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-node" name = "lancedb-node"
version = "0.13.0" version = "0.13.1-beta.0"
description = "Serverless, low-latency vector database for AI applications" description = "Serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true
edition.workspace = true edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb" name = "lancedb"
version = "0.13.0" version = "0.13.1-beta.0"
edition.workspace = true edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications" description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true