mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-03 18:32:55 +00:00
Compare commits
5 Commits
python-v0.
...
rmeng/pool
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cc7a503faa | ||
|
|
2ded17452b | ||
|
|
dfd9d2ac99 | ||
|
|
162880140e | ||
|
|
99d9ced6d5 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.13.0"
|
current_version = "0.13.1-beta.0"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.13.0-final.0</version>
|
<version>0.13.1-beta.0</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.13.0-final.0</version>
|
<version>0.13.1-beta.0</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
<name>LanceDB Parent</name>
|
<name>LanceDB Parent</name>
|
||||||
|
|||||||
78
node/package-lock.json
generated
78
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.13.0",
|
"version": "0.13.1-beta.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.13.0",
|
"version": "0.13.1-beta.0",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -52,12 +52,14 @@
|
|||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.13.0",
|
"@lancedb/vectordb-darwin-arm64": "0.13.1-beta.0",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.13.0",
|
"@lancedb/vectordb-darwin-x64": "0.13.1-beta.0",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.13.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.13.1-beta.0",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.13.0",
|
"@lancedb/vectordb-linux-arm64-musl": "0.13.1-beta.0",
|
||||||
"@lancedb/vectordb-win32-arm64-msvc": "0.13.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.13.1-beta.0",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.13.0"
|
"@lancedb/vectordb-linux-x64-musl": "0.13.1-beta.0",
|
||||||
|
"@lancedb/vectordb-win32-arm64-msvc": "0.13.1-beta.0",
|
||||||
|
"@lancedb/vectordb-win32-x64-msvc": "0.13.1-beta.0"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@apache-arrow/ts": "^14.0.2",
|
"@apache-arrow/ts": "^14.0.2",
|
||||||
@@ -327,66 +329,6 @@
|
|||||||
"@jridgewell/sourcemap-codec": "^1.4.10"
|
"@jridgewell/sourcemap-codec": "^1.4.10"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
|
||||||
"version": "0.13.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.13.0.tgz",
|
|
||||||
"integrity": "sha512-8hdcjkRmgrdQYf1jN+DyZae40LIv8UUfnWy70Uid5qy63sSvRW/+MvIdqIPFr9QlLUXmpyyQuX0y3bZhUR99cQ==",
|
|
||||||
"cpu": [
|
|
||||||
"arm64"
|
|
||||||
],
|
|
||||||
"optional": true,
|
|
||||||
"os": [
|
|
||||||
"darwin"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
|
||||||
"version": "0.13.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.13.0.tgz",
|
|
||||||
"integrity": "sha512-fWzAY4l5SQtNfMYh80v+M66ugZHhdxbkpk5mNEv6Zsug3DL6kRj3Uv31/i0wgzY6F5G3LUlbjZerN+eTnDLwOw==",
|
|
||||||
"cpu": [
|
|
||||||
"x64"
|
|
||||||
],
|
|
||||||
"optional": true,
|
|
||||||
"os": [
|
|
||||||
"darwin"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
|
||||||
"version": "0.13.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.13.0.tgz",
|
|
||||||
"integrity": "sha512-ltwAT9baOSuR5YiGykQXPC8/HGYF13vpI47qxhP9yfgiz9pA8EUn8p8YrBRzq7J4DIZ4b8JSVDXQnMIqEtB4Kg==",
|
|
||||||
"cpu": [
|
|
||||||
"arm64"
|
|
||||||
],
|
|
||||||
"optional": true,
|
|
||||||
"os": [
|
|
||||||
"linux"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
|
||||||
"version": "0.13.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.13.0.tgz",
|
|
||||||
"integrity": "sha512-MiT/RBlMPGGRh7BX+MXwRuNiiUnKmuDcHH8nm88IH28T7TQxXIbA9w6UpSg5m9f3DgKQI2K8oLi29oKIB8ZwDQ==",
|
|
||||||
"cpu": [
|
|
||||||
"x64"
|
|
||||||
],
|
|
||||||
"optional": true,
|
|
||||||
"os": [
|
|
||||||
"linux"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
|
||||||
"version": "0.13.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.13.0.tgz",
|
|
||||||
"integrity": "sha512-SovP/hwWYLJIy65DKbVuXlBPTb/nwvVpTO6dh9zRch+L5ek6JmVAkwsfeTS2p5bMa8VPujsCXYUAVuCDEJU8wg==",
|
|
||||||
"cpu": [
|
|
||||||
"x64"
|
|
||||||
],
|
|
||||||
"optional": true,
|
|
||||||
"os": [
|
|
||||||
"win32"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"node_modules/@neon-rs/cli": {
|
"node_modules/@neon-rs/cli": {
|
||||||
"version": "0.0.160",
|
"version": "0.0.160",
|
||||||
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
|
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.13.0",
|
"version": "0.13.1-beta.0",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
@@ -84,18 +84,20 @@
|
|||||||
"aarch64-apple-darwin": "@lancedb/vectordb-darwin-arm64",
|
"aarch64-apple-darwin": "@lancedb/vectordb-darwin-arm64",
|
||||||
"x86_64-unknown-linux-gnu": "@lancedb/vectordb-linux-x64-gnu",
|
"x86_64-unknown-linux-gnu": "@lancedb/vectordb-linux-x64-gnu",
|
||||||
"aarch64-unknown-linux-gnu": "@lancedb/vectordb-linux-arm64-gnu",
|
"aarch64-unknown-linux-gnu": "@lancedb/vectordb-linux-arm64-gnu",
|
||||||
|
"x86_64-unknown-linux-musl": "@lancedb/vectordb-linux-x64-musl",
|
||||||
|
"aarch64-unknown-linux-musl": "@lancedb/vectordb-linux-arm64-musl",
|
||||||
"x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc",
|
"x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc",
|
||||||
"aarch64-pc-windows-msvc": "@lancedb/vectordb-win32-arm64-msvc"
|
"aarch64-pc-windows-msvc": "@lancedb/vectordb-win32-arm64-msvc"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-x64": "0.13.0",
|
"@lancedb/vectordb-darwin-x64": "0.13.1-beta.0",
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.13.0",
|
"@lancedb/vectordb-darwin-arm64": "0.13.1-beta.0",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.13.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.13.1-beta.0",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.13.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.13.1-beta.0",
|
||||||
"@lancedb/vectordb-linux-x64-musl": "0.13.0",
|
"@lancedb/vectordb-linux-x64-musl": "0.13.1-beta.0",
|
||||||
"@lancedb/vectordb-linux-arm64-musl": "0.13.0",
|
"@lancedb/vectordb-linux-arm64-musl": "0.13.1-beta.0",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.13.0",
|
"@lancedb/vectordb-win32-x64-msvc": "0.13.1-beta.0",
|
||||||
"@lancedb/vectordb-win32-arm64-msvc": "0.13.0"
|
"@lancedb/vectordb-win32-arm64-msvc": "0.13.1-beta.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.13.0"
|
version = "0.13.1-beta.0"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.13.0",
|
"version": "0.13.1-beta.0",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.13.0",
|
"version": "0.13.1-beta.0",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.13.0",
|
"version": "0.13.1-beta.0",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.13.0",
|
"version": "0.13.1-beta.0",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.13.0",
|
"version": "0.13.1-beta.0",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.13.0",
|
"version": "0.13.1-beta.0",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.13.0",
|
"version": "0.13.1-beta.0",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.13.0",
|
"version": "0.13.1-beta.0",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
"vector database",
|
"vector database",
|
||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"version": "0.13.0",
|
"version": "0.13.1-beta.0",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
|
|||||||
@@ -17,11 +17,17 @@ crate-type = ["cdylib"]
|
|||||||
arrow = { version = "52.1", features = ["pyarrow"] }
|
arrow = { version = "52.1", features = ["pyarrow"] }
|
||||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||||
env_logger.workspace = true
|
env_logger.workspace = true
|
||||||
pyo3 = { version = "0.21", features = ["extension-module", "abi3-py38", "gil-refs"] }
|
pyo3 = { version = "0.21", features = [
|
||||||
|
"extension-module",
|
||||||
|
"abi3-py39",
|
||||||
|
"gil-refs"
|
||||||
|
] }
|
||||||
# Using this fork for now: https://github.com/awestlake87/pyo3-asyncio/issues/119
|
# Using this fork for now: https://github.com/awestlake87/pyo3-asyncio/issues/119
|
||||||
# pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] }
|
# pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] }
|
||||||
pyo3-asyncio-0-21 = { version = "0.21.0", features = ["attributes", "tokio-runtime"] }
|
pyo3-asyncio-0-21 = { version = "0.21.0", features = [
|
||||||
|
"attributes",
|
||||||
|
"tokio-runtime"
|
||||||
|
] }
|
||||||
pin-project = "1.1.5"
|
pin-project = "1.1.5"
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
tokio = { version = "1.36.0", features = ["sync"] }
|
tokio = { version = "1.36.0", features = ["sync"] }
|
||||||
@@ -29,14 +35,13 @@ tokio = { version = "1.36.0", features = ["sync"] }
|
|||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
pyo3-build-config = { version = "0.20.3", features = [
|
pyo3-build-config = { version = "0.20.3", features = [
|
||||||
"extension-module",
|
"extension-module",
|
||||||
"abi3-py38",
|
"abi3-py39",
|
||||||
] }
|
] }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["default-tls", "remote"]
|
default = ["default-tls", "remote"]
|
||||||
fp16kernels = ["lancedb/fp16kernels"]
|
fp16kernels = ["lancedb/fp16kernels"]
|
||||||
remote = ["lancedb/remote"]
|
remote = ["lancedb/remote"]
|
||||||
|
|
||||||
# TLS
|
# TLS
|
||||||
default-tls = ["lancedb/default-tls"]
|
default-tls = ["lancedb/default-tls"]
|
||||||
native-tls = ["lancedb/native-tls"]
|
native-tls = ["lancedb/native-tls"]
|
||||||
|
|||||||
@@ -31,7 +31,6 @@ classifiers = [
|
|||||||
"Programming Language :: Python",
|
"Programming Language :: Python",
|
||||||
"Programming Language :: Python :: 3",
|
"Programming Language :: Python :: 3",
|
||||||
"Programming Language :: Python :: 3 :: Only",
|
"Programming Language :: Python :: 3 :: Only",
|
||||||
"Programming Language :: Python :: 3.8",
|
|
||||||
"Programming Language :: Python :: 3.9",
|
"Programming Language :: Python :: 3.9",
|
||||||
"Programming Language :: Python :: 3.10",
|
"Programming Language :: Python :: 3.10",
|
||||||
"Programming Language :: Python :: 3.11",
|
"Programming Language :: Python :: 3.11",
|
||||||
|
|||||||
@@ -83,25 +83,33 @@ class OpenAIEmbeddings(TextEmbeddingFunction):
|
|||||||
"""
|
"""
|
||||||
openai = attempt_import_or_raise("openai")
|
openai = attempt_import_or_raise("openai")
|
||||||
|
|
||||||
|
valid_texts = []
|
||||||
|
valid_indices = []
|
||||||
|
for idx, text in enumerate(texts):
|
||||||
|
if text:
|
||||||
|
valid_texts.append(text)
|
||||||
|
valid_indices.append(idx)
|
||||||
|
|
||||||
# TODO retry, rate limit, token limit
|
# TODO retry, rate limit, token limit
|
||||||
try:
|
try:
|
||||||
if self.name == "text-embedding-ada-002":
|
kwargs = {
|
||||||
rs = self._openai_client.embeddings.create(input=texts, model=self.name)
|
"input": valid_texts,
|
||||||
else:
|
"model": self.name,
|
||||||
kwargs = {
|
}
|
||||||
"input": texts,
|
if self.name != "text-embedding-ada-002":
|
||||||
"model": self.name,
|
kwargs["dimensions"] = self.dim
|
||||||
}
|
|
||||||
if self.dim:
|
rs = self._openai_client.embeddings.create(**kwargs)
|
||||||
kwargs["dimensions"] = self.dim
|
valid_embeddings = {
|
||||||
rs = self._openai_client.embeddings.create(**kwargs)
|
idx: v.embedding for v, idx in zip(rs.data, valid_indices)
|
||||||
|
}
|
||||||
except openai.BadRequestError:
|
except openai.BadRequestError:
|
||||||
logging.exception("Bad request: %s", texts)
|
logging.exception("Bad request: %s", texts)
|
||||||
return [None] * len(texts)
|
return [None] * len(texts)
|
||||||
except Exception:
|
except Exception:
|
||||||
logging.exception("OpenAI embeddings error")
|
logging.exception("OpenAI embeddings error")
|
||||||
raise
|
raise
|
||||||
return [v.embedding for v in rs.data]
|
return [valid_embeddings.get(idx, None) for idx in range(len(texts))]
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def _openai_client(self):
|
def _openai_client(self):
|
||||||
|
|||||||
@@ -1,15 +1,5 @@
|
|||||||
# Copyright 2023 LanceDB Developers
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
"""Pydantic (v1 / v2) adapter for LanceDB"""
|
"""Pydantic (v1 / v2) adapter for LanceDB"""
|
||||||
|
|
||||||
@@ -30,6 +20,7 @@ from typing import (
|
|||||||
Type,
|
Type,
|
||||||
Union,
|
Union,
|
||||||
_GenericAlias,
|
_GenericAlias,
|
||||||
|
GenericAlias,
|
||||||
)
|
)
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -75,7 +66,7 @@ def vector(dim: int, value_type: pa.DataType = pa.float32()):
|
|||||||
|
|
||||||
|
|
||||||
def Vector(
|
def Vector(
|
||||||
dim: int, value_type: pa.DataType = pa.float32()
|
dim: int, value_type: pa.DataType = pa.float32(), nullable: bool = True
|
||||||
) -> Type[FixedSizeListMixin]:
|
) -> Type[FixedSizeListMixin]:
|
||||||
"""Pydantic Vector Type.
|
"""Pydantic Vector Type.
|
||||||
|
|
||||||
@@ -88,6 +79,8 @@ def Vector(
|
|||||||
The dimension of the vector.
|
The dimension of the vector.
|
||||||
value_type : pyarrow.DataType, optional
|
value_type : pyarrow.DataType, optional
|
||||||
The value type of the vector, by default pa.float32()
|
The value type of the vector, by default pa.float32()
|
||||||
|
nullable : bool, optional
|
||||||
|
Whether the vector is nullable, by default it is True.
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@@ -103,7 +96,7 @@ def Vector(
|
|||||||
>>> assert schema == pa.schema([
|
>>> assert schema == pa.schema([
|
||||||
... pa.field("id", pa.int64(), False),
|
... pa.field("id", pa.int64(), False),
|
||||||
... pa.field("url", pa.utf8(), False),
|
... pa.field("url", pa.utf8(), False),
|
||||||
... pa.field("embeddings", pa.list_(pa.float32(), 768), False)
|
... pa.field("embeddings", pa.list_(pa.float32(), 768))
|
||||||
... ])
|
... ])
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -112,6 +105,10 @@ def Vector(
|
|||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"FixedSizeList(dim={dim})"
|
return f"FixedSizeList(dim={dim})"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def nullable() -> bool:
|
||||||
|
return nullable
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def dim() -> int:
|
def dim() -> int:
|
||||||
return dim
|
return dim
|
||||||
@@ -205,9 +202,7 @@ else:
|
|||||||
def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType:
|
def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType:
|
||||||
"""Convert a Pydantic FieldInfo to Arrow DataType"""
|
"""Convert a Pydantic FieldInfo to Arrow DataType"""
|
||||||
|
|
||||||
if isinstance(field.annotation, _GenericAlias) or (
|
if isinstance(field.annotation, (_GenericAlias, GenericAlias)):
|
||||||
sys.version_info > (3, 9) and isinstance(field.annotation, types.GenericAlias)
|
|
||||||
):
|
|
||||||
origin = field.annotation.__origin__
|
origin = field.annotation.__origin__
|
||||||
args = field.annotation.__args__
|
args = field.annotation.__args__
|
||||||
if origin is list:
|
if origin is list:
|
||||||
@@ -235,7 +230,7 @@ def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType:
|
|||||||
|
|
||||||
def is_nullable(field: FieldInfo) -> bool:
|
def is_nullable(field: FieldInfo) -> bool:
|
||||||
"""Check if a Pydantic FieldInfo is nullable."""
|
"""Check if a Pydantic FieldInfo is nullable."""
|
||||||
if isinstance(field.annotation, _GenericAlias):
|
if isinstance(field.annotation, (_GenericAlias, GenericAlias)):
|
||||||
origin = field.annotation.__origin__
|
origin = field.annotation.__origin__
|
||||||
args = field.annotation.__args__
|
args = field.annotation.__args__
|
||||||
if origin == Union:
|
if origin == Union:
|
||||||
@@ -246,6 +241,10 @@ def is_nullable(field: FieldInfo) -> bool:
|
|||||||
for typ in args:
|
for typ in args:
|
||||||
if typ is type(None):
|
if typ is type(None):
|
||||||
return True
|
return True
|
||||||
|
elif inspect.isclass(field.annotation) and issubclass(
|
||||||
|
field.annotation, FixedSizeListMixin
|
||||||
|
):
|
||||||
|
return field.annotation.nullable()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,8 @@ from datetime import date, datetime
|
|||||||
from functools import singledispatch
|
from functools import singledispatch
|
||||||
from typing import Tuple, Union, Optional, Any
|
from typing import Tuple, Union, Optional, Any
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
from threading import Lock
|
||||||
|
from contextlib import contextmanager
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
@@ -314,3 +316,27 @@ def deprecated(func):
|
|||||||
def validate_table_name(name: str):
|
def validate_table_name(name: str):
|
||||||
"""Verify the table name is valid."""
|
"""Verify the table name is valid."""
|
||||||
native_validate_table_name(name)
|
native_validate_table_name(name)
|
||||||
|
|
||||||
|
|
||||||
|
class ConnectionPool:
|
||||||
|
def __init__(self, connection_factory, *, max_size: Optional[int] = None):
|
||||||
|
self.max_size = max_size
|
||||||
|
self._connection_factory = connection_factory
|
||||||
|
self._pool = []
|
||||||
|
self._lock = Lock()
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def connection(self):
|
||||||
|
with self._lock:
|
||||||
|
if self._pool:
|
||||||
|
conn = self._pool.pop()
|
||||||
|
else:
|
||||||
|
conn = self._connection_factory()
|
||||||
|
|
||||||
|
# release the lock before yielding
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
finally:
|
||||||
|
with self._lock:
|
||||||
|
if self.max_size is None or len(self._pool) < self.max_size:
|
||||||
|
self._pool.append(conn)
|
||||||
|
|||||||
@@ -90,10 +90,13 @@ def test_embedding_with_bad_results(tmp_path):
|
|||||||
self, texts: Union[List[str], np.ndarray]
|
self, texts: Union[List[str], np.ndarray]
|
||||||
) -> list[Union[np.array, None]]:
|
) -> list[Union[np.array, None]]:
|
||||||
# Return None, which is bad if field is non-nullable
|
# Return None, which is bad if field is non-nullable
|
||||||
return [
|
a = [
|
||||||
None if i % 2 == 0 else np.random.randn(self.ndims())
|
np.full(self.ndims(), np.nan)
|
||||||
|
if i % 2 == 0
|
||||||
|
else np.random.randn(self.ndims())
|
||||||
for i in range(len(texts))
|
for i in range(len(texts))
|
||||||
]
|
]
|
||||||
|
return a
|
||||||
|
|
||||||
db = lancedb.connect(tmp_path)
|
db = lancedb.connect(tmp_path)
|
||||||
registry = EmbeddingFunctionRegistry.get_instance()
|
registry = EmbeddingFunctionRegistry.get_instance()
|
||||||
|
|||||||
@@ -1,15 +1,6 @@
|
|||||||
# Copyright (c) 2023. LanceDB Developers
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
import importlib
|
import importlib
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
@@ -17,6 +8,7 @@ import os
|
|||||||
import lancedb
|
import lancedb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import pyarrow as pa
|
||||||
import pytest
|
import pytest
|
||||||
from lancedb.embeddings import get_registry
|
from lancedb.embeddings import get_registry
|
||||||
from lancedb.pydantic import LanceModel, Vector
|
from lancedb.pydantic import LanceModel, Vector
|
||||||
@@ -444,6 +436,30 @@ def test_watsonx_embedding(tmp_path):
|
|||||||
assert tbl.search("hello").limit(1).to_pandas()["text"][0] == "hello world"
|
assert tbl.search("hello").limit(1).to_pandas()["text"][0] == "hello world"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
os.environ.get("OPENAI_API_KEY") is None, reason="OPENAI_API_KEY not set"
|
||||||
|
)
|
||||||
|
def test_openai_with_empty_strs(tmp_path):
|
||||||
|
model = get_registry().get("openai").create(max_retries=0)
|
||||||
|
|
||||||
|
class TextModel(LanceModel):
|
||||||
|
text: str = model.SourceField()
|
||||||
|
vector: Vector(model.ndims()) = model.VectorField()
|
||||||
|
|
||||||
|
df = pd.DataFrame({"text": ["hello world", ""]})
|
||||||
|
db = lancedb.connect(tmp_path)
|
||||||
|
tbl = db.create_table("test", schema=TextModel, mode="overwrite")
|
||||||
|
|
||||||
|
tbl.add(df, on_bad_vectors="skip")
|
||||||
|
tb = tbl.to_arrow()
|
||||||
|
assert tb.schema.field_by_name("vector").type == pa.list_(
|
||||||
|
pa.float32(), model.ndims()
|
||||||
|
)
|
||||||
|
assert len(tb) == 2
|
||||||
|
assert tb["vector"].is_null().to_pylist() == [False, True]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
importlib.util.find_spec("ollama") is None, reason="Ollama not installed"
|
importlib.util.find_spec("ollama") is None, reason="Ollama not installed"
|
||||||
|
|||||||
@@ -1,16 +1,5 @@
|
|||||||
# Copyright 2023 LanceDB Developers
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
@@ -172,6 +161,26 @@ def test_pydantic_to_arrow_py38():
|
|||||||
assert schema == expect_schema
|
assert schema == expect_schema
|
||||||
|
|
||||||
|
|
||||||
|
def test_nullable_vector():
|
||||||
|
class NullableModel(pydantic.BaseModel):
|
||||||
|
vec: Vector(16, nullable=False)
|
||||||
|
|
||||||
|
schema = pydantic_to_schema(NullableModel)
|
||||||
|
assert schema == pa.schema([pa.field("vec", pa.list_(pa.float32(), 16), False)])
|
||||||
|
|
||||||
|
class DefaultModel(pydantic.BaseModel):
|
||||||
|
vec: Vector(16)
|
||||||
|
|
||||||
|
schema = pydantic_to_schema(DefaultModel)
|
||||||
|
assert schema == pa.schema([pa.field("vec", pa.list_(pa.float32(), 16), True)])
|
||||||
|
|
||||||
|
class NotNullableModel(pydantic.BaseModel):
|
||||||
|
vec: Vector(16)
|
||||||
|
|
||||||
|
schema = pydantic_to_schema(NotNullableModel)
|
||||||
|
assert schema == pa.schema([pa.field("vec", pa.list_(pa.float32(), 16), True)])
|
||||||
|
|
||||||
|
|
||||||
def test_fixed_size_list_field():
|
def test_fixed_size_list_field():
|
||||||
class TestModel(pydantic.BaseModel):
|
class TestModel(pydantic.BaseModel):
|
||||||
vec: Vector(16)
|
vec: Vector(16)
|
||||||
@@ -192,7 +201,7 @@ def test_fixed_size_list_field():
|
|||||||
schema = pydantic_to_schema(TestModel)
|
schema = pydantic_to_schema(TestModel)
|
||||||
assert schema == pa.schema(
|
assert schema == pa.schema(
|
||||||
[
|
[
|
||||||
pa.field("vec", pa.list_(pa.float32(), 16), False),
|
pa.field("vec", pa.list_(pa.float32(), 16)),
|
||||||
pa.field("li", pa.list_(pa.int64()), False),
|
pa.field("li", pa.list_(pa.int64()), False),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -6,13 +6,16 @@ from datetime import timedelta
|
|||||||
import http.server
|
import http.server
|
||||||
import json
|
import json
|
||||||
import threading
|
import threading
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
import lancedb
|
import lancedb
|
||||||
from lancedb.conftest import MockTextEmbeddingFunction
|
from lancedb.conftest import MockTextEmbeddingFunction
|
||||||
from lancedb.remote import ClientConfig
|
from lancedb.remote import ClientConfig
|
||||||
|
from lancedb.util import ConnectionPool
|
||||||
from lancedb.remote.errors import HttpError, RetryError
|
from lancedb.remote.errors import HttpError, RetryError
|
||||||
|
import lancedb.util
|
||||||
import pytest
|
import pytest
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
|
|
||||||
@@ -55,6 +58,34 @@ def mock_lancedb_connection(handler):
|
|||||||
handle.join()
|
handle.join()
|
||||||
|
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def mock_lancedb_connection_pool(handler):
|
||||||
|
with http.server.HTTPServer(
|
||||||
|
("localhost", 8080), make_mock_http_handler(handler)
|
||||||
|
) as server:
|
||||||
|
handle = threading.Thread(target=server.serve_forever)
|
||||||
|
handle.start()
|
||||||
|
|
||||||
|
def conn_factory():
|
||||||
|
lancedb.connect(
|
||||||
|
"db://dev",
|
||||||
|
api_key="fake",
|
||||||
|
host_override="http://localhost:8080",
|
||||||
|
client_config={
|
||||||
|
"retry_config": {"retries": 2},
|
||||||
|
"timeout_config": {
|
||||||
|
"connect_timeout": 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
yield ConnectionPool(conn_factory)
|
||||||
|
finally:
|
||||||
|
server.shutdown()
|
||||||
|
handle.join()
|
||||||
|
|
||||||
|
|
||||||
@contextlib.asynccontextmanager
|
@contextlib.asynccontextmanager
|
||||||
async def mock_lancedb_connection_async(handler):
|
async def mock_lancedb_connection_async(handler):
|
||||||
with http.server.HTTPServer(
|
with http.server.HTTPServer(
|
||||||
@@ -187,8 +218,7 @@ async def test_retry_error():
|
|||||||
assert cause.status_code == 429
|
assert cause.status_code == 429
|
||||||
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
def http_handler(query_handler):
|
||||||
def query_test_table(query_handler):
|
|
||||||
def handler(request):
|
def handler(request):
|
||||||
if request.path == "/v1/table/test/describe/":
|
if request.path == "/v1/table/test/describe/":
|
||||||
request.send_response(200)
|
request.send_response(200)
|
||||||
@@ -212,7 +242,12 @@ def query_test_table(query_handler):
|
|||||||
request.send_response(404)
|
request.send_response(404)
|
||||||
request.end_headers()
|
request.end_headers()
|
||||||
|
|
||||||
with mock_lancedb_connection(handler) as db:
|
return handler
|
||||||
|
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def query_test_table(connection_ctx_mgr):
|
||||||
|
with connection_ctx_mgr as db:
|
||||||
assert repr(db) == "RemoteConnect(name=dev)"
|
assert repr(db) == "RemoteConnect(name=dev)"
|
||||||
table = db.open_table("test")
|
table = db.open_table("test")
|
||||||
assert repr(table) == "RemoteTable(dev.test)"
|
assert repr(table) == "RemoteTable(dev.test)"
|
||||||
@@ -220,6 +255,7 @@ def query_test_table(query_handler):
|
|||||||
|
|
||||||
|
|
||||||
def test_query_sync_minimal():
|
def test_query_sync_minimal():
|
||||||
|
@http_handler
|
||||||
def handler(body):
|
def handler(body):
|
||||||
assert body == {
|
assert body == {
|
||||||
"distance_type": "l2",
|
"distance_type": "l2",
|
||||||
@@ -234,13 +270,53 @@ def test_query_sync_minimal():
|
|||||||
|
|
||||||
return pa.table({"id": [1, 2, 3]})
|
return pa.table({"id": [1, 2, 3]})
|
||||||
|
|
||||||
with query_test_table(handler) as table:
|
with query_test_table(mock_lancedb_connection(handler)) as table:
|
||||||
|
data = table.search([1, 2, 3]).to_list()
|
||||||
|
expected = [{"id": 1}, {"id": 2}, {"id": 3}]
|
||||||
|
assert data == expected
|
||||||
|
|
||||||
|
with query_test_table(mock_lancedb_connection_pool(handler).connection()) as table:
|
||||||
data = table.search([1, 2, 3]).to_list()
|
data = table.search([1, 2, 3]).to_list()
|
||||||
expected = [{"id": 1}, {"id": 2}, {"id": 3}]
|
expected = [{"id": 1}, {"id": 2}, {"id": 3}]
|
||||||
assert data == expected
|
assert data == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_query_sync_minimal_threaded():
|
||||||
|
num_query = 0
|
||||||
|
|
||||||
|
@http_handler
|
||||||
|
def handler(body):
|
||||||
|
assert body == {
|
||||||
|
"distance_type": "l2",
|
||||||
|
"k": 10,
|
||||||
|
"prefilter": False,
|
||||||
|
"refine_factor": None,
|
||||||
|
"ef": None,
|
||||||
|
"vector": [1.0, 2.0, 3.0],
|
||||||
|
"nprobes": 20,
|
||||||
|
"version": None,
|
||||||
|
}
|
||||||
|
nonlocal num_query
|
||||||
|
num_query += 1
|
||||||
|
|
||||||
|
return pa.table({"id": [1, 2, 3]})
|
||||||
|
|
||||||
|
pool = mock_lancedb_connection_pool(handler)
|
||||||
|
|
||||||
|
def _query(i):
|
||||||
|
with query_test_table(pool.connection()) as table:
|
||||||
|
data = table.search([1, 2, 3]).to_list()
|
||||||
|
expected = [{"id": 1}, {"id": 2}, {"id": 3}]
|
||||||
|
assert data == expected
|
||||||
|
|
||||||
|
with ThreadPoolExecutor as exec:
|
||||||
|
exec.map(_query, range(1000))
|
||||||
|
|
||||||
|
assert num_query == 1000
|
||||||
|
|
||||||
|
|
||||||
def test_query_sync_empty_query():
|
def test_query_sync_empty_query():
|
||||||
|
@http_handler
|
||||||
def handler(body):
|
def handler(body):
|
||||||
assert body == {
|
assert body == {
|
||||||
"k": 10,
|
"k": 10,
|
||||||
@@ -252,7 +328,12 @@ def test_query_sync_empty_query():
|
|||||||
|
|
||||||
return pa.table({"id": [1, 2, 3]})
|
return pa.table({"id": [1, 2, 3]})
|
||||||
|
|
||||||
with query_test_table(handler) as table:
|
with query_test_table(mock_lancedb_connection(handler)) as table:
|
||||||
|
data = table.search(None).where("true").select(["id"]).limit(10).to_list()
|
||||||
|
expected = [{"id": 1}, {"id": 2}, {"id": 3}]
|
||||||
|
assert data == expected
|
||||||
|
|
||||||
|
with query_test_table(mock_lancedb_connection_pool(handler).connection()) as table:
|
||||||
data = table.search(None).where("true").select(["id"]).limit(10).to_list()
|
data = table.search(None).where("true").select(["id"]).limit(10).to_list()
|
||||||
expected = [{"id": 1}, {"id": 2}, {"id": 3}]
|
expected = [{"id": 1}, {"id": 2}, {"id": 3}]
|
||||||
assert data == expected
|
assert data == expected
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.13.0"
|
version = "0.13.1-beta.0"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.13.0"
|
version = "0.13.1-beta.0"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
Reference in New Issue
Block a user