mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 05:19:58 +00:00
Compare commits
13 Commits
v0.15.0-be
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
995bd9bf37 | ||
|
|
36cc06697f | ||
|
|
35da464591 | ||
|
|
31f9c30ffb | ||
|
|
92dcf24b0c | ||
|
|
6b0adba2d9 | ||
|
|
66cbf6b6c5 | ||
|
|
ce9506db71 | ||
|
|
b66cd943a7 | ||
|
|
d8d11f48e7 | ||
|
|
7ec5df3022 | ||
|
|
b17304172c | ||
|
|
fbe5408434 |
22
Cargo.toml
22
Cargo.toml
@@ -21,16 +21,14 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.78.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.21.1", "features" = [
|
||||
"dynamodb",
|
||||
], git = "https://github.com/lancedb/lance.git", tag = "v0.21.1-beta.2" }
|
||||
lance-io = { version = "=0.21.1", git = "https://github.com/lancedb/lance.git", tag = "v0.21.1-beta.2" }
|
||||
lance-index = { version = "=0.21.1", git = "https://github.com/lancedb/lance.git", tag = "v0.21.1-beta.2" }
|
||||
lance-linalg = { version = "=0.21.1", git = "https://github.com/lancedb/lance.git", tag = "v0.21.1-beta.2" }
|
||||
lance-table = { version = "=0.21.1", git = "https://github.com/lancedb/lance.git", tag = "v0.21.1-beta.2" }
|
||||
lance-testing = { version = "=0.21.1", git = "https://github.com/lancedb/lance.git", tag = "v0.21.1-beta.2" }
|
||||
lance-datafusion = { version = "=0.21.1", git = "https://github.com/lancedb/lance.git", tag = "v0.21.1-beta.2" }
|
||||
lance-encoding = { version = "=0.21.1", git = "https://github.com/lancedb/lance.git", tag = "v0.21.1-beta.2" }
|
||||
lance = { "version" = "=0.22.0", "features" = ["dynamodb"] }
|
||||
lance-io = "=0.22.0"
|
||||
lance-index = "=0.22.0"
|
||||
lance-linalg = "=0.22.0"
|
||||
lance-table = "=0.22.0"
|
||||
lance-testing = "=0.22.0"
|
||||
lance-datafusion = "=0.22.0"
|
||||
lance-encoding = "=0.22.0"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "53.2", optional = false }
|
||||
arrow-array = "53.2"
|
||||
@@ -42,8 +40,8 @@ arrow-arith = "53.2"
|
||||
arrow-cast = "53.2"
|
||||
async-trait = "0"
|
||||
chrono = "0.4.35"
|
||||
datafusion-common = "42.0"
|
||||
datafusion-physical-plan = "42.0"
|
||||
datafusion-common = "44.0"
|
||||
datafusion-physical-plan = "44.0"
|
||||
env_logger = "0.10"
|
||||
half = { "version" = "=2.4.1", default-features = false, features = [
|
||||
"num-traits",
|
||||
|
||||
@@ -12,7 +12,7 @@ with open("Cargo.toml", "rb") as f:
|
||||
elif isinstance(dep, dict):
|
||||
# Version doesn't have the beta tag in it, so we instead look
|
||||
# at the git tag.
|
||||
version = dep["tag"]
|
||||
version = dep.get('tag', dep.get('version'))
|
||||
else:
|
||||
raise ValueError("Unexpected type for dependency: " + str(dep))
|
||||
|
||||
|
||||
BIN
docs/src/assets/maxsim.png
Normal file
BIN
docs/src/assets/maxsim.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 10 KiB |
@@ -7,7 +7,7 @@ Approximate Nearest Neighbor (ANN) search is a method for finding data points ne
|
||||
There are three main types of ANN search algorithms:
|
||||
|
||||
* **Tree-based search algorithms**: Use a tree structure to organize and store data points.
|
||||
* * **Hash-based search algorithms**: Use a specialized geometric hash table to store and manage data points. These algorithms typically focus on theoretical guarantees, and don't usually perform as well as the other approaches in practice.
|
||||
* **Hash-based search algorithms**: Use a specialized geometric hash table to store and manage data points. These algorithms typically focus on theoretical guarantees, and don't usually perform as well as the other approaches in practice.
|
||||
* **Graph-based search algorithms**: Use a graph structure to store data points, which can be a bit complex.
|
||||
|
||||
HNSW is a graph-based algorithm. All graph-based search algorithms rely on the idea of a k-nearest neighbor (or k-approximate nearest neighbor) graph, which we outline below.
|
||||
|
||||
@@ -138,6 +138,36 @@ LanceDB supports binary vectors as a data type, and has the ability to search bi
|
||||
--8<-- "python/python/tests/docs/test_binary_vector.py:async_binary_vector"
|
||||
```
|
||||
|
||||
## Multivector type
|
||||
|
||||
LanceDB supports multivector type, this is useful when you have multiple vectors for a single item (e.g. with ColBert and ColPali).
|
||||
|
||||
You can index on a column with multivector type and search on it, the query can be single vector or multiple vectors. If the query is multiple vectors `mq`, the similarity (distance) from it to any multivector `mv` in the dataset, is defined as:
|
||||
|
||||

|
||||
|
||||
where `sim` is the similarity function (e.g. cosine).
|
||||
|
||||
For now, only `cosine` metric is supported for multivector search.
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_multivector.py:imports"
|
||||
|
||||
--8<-- "python/python/tests/docs/test_multivector.py:sync_multivector"
|
||||
```
|
||||
|
||||
=== "async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_multivector.py:imports"
|
||||
|
||||
--8<-- "python/python/tests/docs/test_multivector.py:async_multivector"
|
||||
```
|
||||
|
||||
## Search with distance range
|
||||
|
||||
You can also search for vectors within a specific distance range from the query vector. This is useful when you want to find vectors that are not just the nearest neighbors, but also those that are within a certain distance. This can be done by using the `distance_range` method.
|
||||
|
||||
@@ -18,7 +18,7 @@ import numpy as np
|
||||
|
||||
uri = "data/sample-lancedb"
|
||||
data = [{"vector": row, "item": f"item {i}", "id": i}
|
||||
for i, row in enumerate(np.random.random((10_000, 2)).astype('int'))]
|
||||
for i, row in enumerate(np.random.random((10_000, 2)))]
|
||||
|
||||
# Synchronous client
|
||||
db = lancedb.connect(uri)
|
||||
|
||||
@@ -2,6 +2,9 @@
|
||||
|
||||
A JavaScript / Node.js library for [LanceDB](https://github.com/lancedb/lancedb).
|
||||
|
||||
**DEPRECATED: This library is deprecated. Please use the new client,
|
||||
[@lancedb/lancedb](https://www.npmjs.com/package/@lancedb/lancedb).**
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
|
||||
75
node/package-lock.json
generated
75
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.14.2-beta.0",
|
||||
"version": "0.15.0-beta.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "vectordb",
|
||||
"version": "0.14.2-beta.0",
|
||||
"version": "0.15.0-beta.0",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
@@ -52,14 +52,14 @@
|
||||
"uuid": "^9.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.14.2-beta.0",
|
||||
"@lancedb/vectordb-darwin-x64": "0.14.2-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.14.2-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.14.2-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.14.2-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.14.2-beta.0",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.14.2-beta.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.14.2-beta.0"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.15.0-beta.0",
|
||||
"@lancedb/vectordb-darwin-x64": "0.15.0-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.15.0-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.15.0-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.15.0-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.15.0-beta.0",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.15.0-beta.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.15.0-beta.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@apache-arrow/ts": "^14.0.2",
|
||||
@@ -330,9 +330,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||
"version": "0.14.2-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.14.2-beta.0.tgz",
|
||||
"integrity": "sha512-nsXOl9M8jhsr/LrfvrVHiuWWj/zX3zU2Aahpw8etjJbnU83nmO1r9agPxN6mD/J60EsLP3gDaiRPaFY66pHScA==",
|
||||
"version": "0.15.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.15.0-beta.0.tgz",
|
||||
"integrity": "sha512-4sPAW4p1YFVfURyf0k017l6LRCz+VmN9fVUBy7W27b6EOQ3xuIb3t5xq3JAtslMPWBP3wxP8rKXXDmlbqDg3+g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -343,9 +343,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||
"version": "0.14.2-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.14.2-beta.0.tgz",
|
||||
"integrity": "sha512-E1ouo0EfGaxG26YWnw717vaHGNLulmqzh6eaTQuj45Vd4GaPj07TJygtDyvMFBJdsZjdY5YIc9U8yIem1NfeKQ==",
|
||||
"version": "0.15.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.15.0-beta.0.tgz",
|
||||
"integrity": "sha512-uzGINrBBsZattB4/ZYxdGNkTxNh3MqE6Y4nF762qo0zWWSiu+QNHQ+ZyLAZ2lwrEvwxs8LUaJNmnpn3nocHc1A==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -356,9 +356,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||
"version": "0.14.2-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.14.2-beta.0.tgz",
|
||||
"integrity": "sha512-SewXZLGccZUkONACHHPCW1Z7xsz8MaXifwpaWMEyIzbQBFAIMq30lPZN63bTt/zNo6BcBPv54yz6n1ZfCv5l+w==",
|
||||
"version": "0.15.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.15.0-beta.0.tgz",
|
||||
"integrity": "sha512-bgphfea8h65vJ+bAL+vb+XEfmjskLZ+trZ3GN4n6SICU7XMGSFPl9xzPLGAj1WsoFCTJHe87DRYQpsWGlOI/LQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -369,9 +369,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-arm64-musl": {
|
||||
"version": "0.14.2-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-musl/-/vectordb-linux-arm64-musl-0.14.2-beta.0.tgz",
|
||||
"integrity": "sha512-ppq3P2QYxPHmikY6nbWTwMhDGP+e+feqzm4iXKhpBxzHR2XwoY5CtDKgKDfEHy1FyCoIyvh2yYT2M1TSkrkOBw==",
|
||||
"version": "0.15.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-musl/-/vectordb-linux-arm64-musl-0.15.0-beta.0.tgz",
|
||||
"integrity": "sha512-GpmVgqMS9ztNX53z8v0JdZiG6K1cK+mJnGZd3Gzguiavrly4mkYZ8IKNwWP9RmewUMNsFWR0IzD4VR+ojVpjlQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -382,9 +382,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||
"version": "0.14.2-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.14.2-beta.0.tgz",
|
||||
"integrity": "sha512-XgkoarmdS42fLMMqNdHTVja2z7a0/Q4h3X+n14Ph/pkYsb7pmOabV4a7+ej8KJPm1wv2GmDA4GXcFPjF0tFBFA==",
|
||||
"version": "0.15.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.15.0-beta.0.tgz",
|
||||
"integrity": "sha512-6Y/39TDv4UDVWnl8UpUJ8mqv9rUNc9Q5VR510I7w34c0ChdWvjqdcy+JFnGrraamE1DA8E6wGEz+5oG0zprkNg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -395,9 +395,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-x64-musl": {
|
||||
"version": "0.14.2-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-musl/-/vectordb-linux-x64-musl-0.14.2-beta.0.tgz",
|
||||
"integrity": "sha512-vGgUOVb43eccF0oz2YJK+Zionwk4ODelHU7icmGeVsULkkFkoAbf0nO4PY38ZAeLsodnLxHIIu51Bd4Jm9m20w==",
|
||||
"version": "0.15.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-musl/-/vectordb-linux-x64-musl-0.15.0-beta.0.tgz",
|
||||
"integrity": "sha512-GRdW2dhf6DmynhRojjtQjs8DeARM1WpbZZKXukeofOSMv6JoRBSWKw2DzW5sF/285IMU81B0OXZE75QjLp+VJg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -407,10 +407,23 @@
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-win32-arm64-msvc": {
|
||||
"version": "0.15.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-arm64-msvc/-/vectordb-win32-arm64-msvc-0.15.0-beta.0.tgz",
|
||||
"integrity": "sha512-2EmRHuqqj8kC5ArUZztUWWTfNd774zL68btOlyhYL1CAiet5jIeGuFWJifdh+PXfQeLoa4GLW5LwyudIR4IHwA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||
"version": "0.14.2-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.14.2-beta.0.tgz",
|
||||
"integrity": "sha512-zGLC382V3gE1MHQpf0XTe34yiB+6ZtSIuOFMIDEZVI5PVN5XkXULMY6dlt5fvo4IxhRoscGjpmmaNxJzUwigDg==",
|
||||
"version": "0.15.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.15.0-beta.0.tgz",
|
||||
"integrity": "sha512-lWq9b7LnWMGO0zDsp3rsLYyAzLooV7zQP77ph9Qv9fF0e4egD5l6SmMsAdQqLQnlhbQjkRjt3XRoDsqI809fcw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
|
||||
@@ -767,7 +767,9 @@ describe("When creating an index", () => {
|
||||
)
|
||||
.column("vec")
|
||||
.toArrow(),
|
||||
).rejects.toThrow(/.* query dim=64, expected vector dim=32.*/);
|
||||
).rejects.toThrow(
|
||||
/.* query dim\(64\) doesn't match the column vec vector dim\(32\).*/,
|
||||
);
|
||||
|
||||
const query64 = Array(64)
|
||||
.fill(1)
|
||||
|
||||
151
nodejs/package-lock.json
generated
151
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.14.2-beta.0",
|
||||
"version": "0.15.0-beta.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.14.2-beta.0",
|
||||
"version": "0.15.0-beta.0",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
@@ -18,7 +18,6 @@
|
||||
"win32"
|
||||
],
|
||||
"dependencies": {
|
||||
"@lancedb/lancedb": "^0.14.1",
|
||||
"reflect-metadata": "^0.2.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
@@ -4150,152 +4149,6 @@
|
||||
"@jridgewell/sourcemap-codec": "^1.4.14"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/lancedb": {
|
||||
"version": "0.14.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/lancedb/-/lancedb-0.14.1.tgz",
|
||||
"integrity": "sha512-DfJ887t52n/2s8G1JnzE7gAR4i7UnfP1OjDYnJ4yTk0aIcn76CbVOUegYfURYlYjL+QFdI1MrAzUdMgYgsGGcA==",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache 2.0",
|
||||
"os": [
|
||||
"darwin",
|
||||
"linux",
|
||||
"win32"
|
||||
],
|
||||
"dependencies": {
|
||||
"reflect-metadata": "^0.2.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/lancedb-darwin-arm64": "0.14.1",
|
||||
"@lancedb/lancedb-darwin-x64": "0.14.1",
|
||||
"@lancedb/lancedb-linux-arm64-gnu": "0.14.1",
|
||||
"@lancedb/lancedb-linux-arm64-musl": "0.14.1",
|
||||
"@lancedb/lancedb-linux-x64-gnu": "0.14.1",
|
||||
"@lancedb/lancedb-linux-x64-musl": "0.14.1",
|
||||
"@lancedb/lancedb-win32-arm64-msvc": "0.14.1",
|
||||
"@lancedb/lancedb-win32-x64-msvc": "0.14.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"apache-arrow": ">=15.0.0 <=18.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/lancedb-darwin-arm64": {
|
||||
"version": "0.14.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-darwin-arm64/-/lancedb-darwin-arm64-0.14.1.tgz",
|
||||
"integrity": "sha512-eSWV3GydXfyaptPXZ+S3BgXY1YI26oHQDekACaVevRW6/YQD7sS9UhhSZn1mYyDtLTfJu2kOK2XHA9UY8nyuTg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache 2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/lancedb-darwin-x64": {
|
||||
"version": "0.14.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-darwin-x64/-/lancedb-darwin-x64-0.14.1.tgz",
|
||||
"integrity": "sha512-ecf50ykF9WCWmpwAjs3Mk2mph7d+rMJ9EVJeX0UJ4KHDC874lnTDo6Tfd9iUcbExtNI1KZbu+CFnYsbQU+R0gw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache 2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/lancedb-linux-arm64-gnu": {
|
||||
"version": "0.14.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-linux-arm64-gnu/-/lancedb-linux-arm64-gnu-0.14.1.tgz",
|
||||
"integrity": "sha512-X7ub1fOm7jZ19KFW/u3nDyFvj5XzDPqEVrp9mmcOgSrst3NJEGGBz1JypkLnTWpg/7IpCBs1UO1G7R7LEsHYOA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache 2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/lancedb-linux-arm64-musl": {
|
||||
"version": "0.14.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-linux-arm64-musl/-/lancedb-linux-arm64-musl-0.14.1.tgz",
|
||||
"integrity": "sha512-rkiWpsQCXwybwEjcdFXkAeGahiLcK/NQUjZc9WBY6CKk2Y9dICIafYzxZ6MDCY19jeJIgs3JS0mjleUWYr3JFw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache 2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/lancedb-linux-x64-gnu": {
|
||||
"version": "0.14.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-linux-x64-gnu/-/lancedb-linux-x64-gnu-0.14.1.tgz",
|
||||
"integrity": "sha512-LGp4D58pQJ3+H3GncNxWHkvhIVOKpTzYUBtVfC8he1rwZ6+CiYDyK9Sim/j8o3UJlJ7cP0m3gNUzPfQchQF9WA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache 2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/lancedb-linux-x64-musl": {
|
||||
"version": "0.14.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-linux-x64-musl/-/lancedb-linux-x64-musl-0.14.1.tgz",
|
||||
"integrity": "sha512-V/TeoyKUESPL/8L1z4WLbMFe5ZEv4gtxc0AFK8ghiduFYN/Hckuj4oTo/Y0ysLiBx1At9FCa91hWDB301ibHBg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache 2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/lancedb-win32-x64-msvc": {
|
||||
"version": "0.14.1",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-win32-x64-msvc/-/lancedb-win32-x64-msvc-0.14.1.tgz",
|
||||
"integrity": "sha512-4M8D0j8/3WZv4CKo+Z44sISKPCKWN5MWA0dcEEGw4sEXHF2RJLrMIOOgEpT5NF7VW+X4t2JJxUA6j2T3cXaD8w==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache 2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/cli": {
|
||||
"version": "2.18.3",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/cli/-/cli-2.18.3.tgz",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.18.0-beta.0"
|
||||
current_version = "0.18.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.18.0-beta.0"
|
||||
version = "0.18.0"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -4,7 +4,7 @@ name = "lancedb"
|
||||
dynamic = ["version"]
|
||||
dependencies = [
|
||||
"deprecation",
|
||||
"pylance==0.21.1b1",
|
||||
"pylance==0.22.0",
|
||||
"tqdm>=4.27.0",
|
||||
"pydantic>=1.10",
|
||||
"packaging",
|
||||
@@ -53,7 +53,7 @@ tests = [
|
||||
"pytz",
|
||||
"polars>=0.19, <=1.3.0",
|
||||
"tantivy",
|
||||
"pyarrow-stubs"
|
||||
"pyarrow-stubs",
|
||||
]
|
||||
dev = ["ruff", "pre-commit", "pyright"]
|
||||
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"]
|
||||
|
||||
@@ -59,7 +59,7 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||
.create(name="voyage-3")
|
||||
|
||||
class TextModel(LanceModel):
|
||||
data: str = voyageai.SourceField()
|
||||
text: str = voyageai.SourceField()
|
||||
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
|
||||
|
||||
data = [ { "text": "hello world" },
|
||||
@@ -74,6 +74,14 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||
|
||||
name: str
|
||||
client: ClassVar = None
|
||||
text_embedding_models: list = [
|
||||
"voyage-3",
|
||||
"voyage-3-lite",
|
||||
"voyage-finance-2",
|
||||
"voyage-law-2",
|
||||
"voyage-code-2",
|
||||
]
|
||||
multimodal_embedding_models: list = ["voyage-multimodal-3"]
|
||||
|
||||
def ndims(self):
|
||||
if self.name == "voyage-3-lite":
|
||||
@@ -115,13 +123,14 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
|
||||
|
||||
truncation: Optional[bool]
|
||||
"""
|
||||
if self.name in ["voyage-multimodal-3"]:
|
||||
rs = VoyageAIEmbeddingFunction._get_client().multimodal_embed(
|
||||
inputs=[[text]], model=self.name, **kwargs
|
||||
)
|
||||
client = VoyageAIEmbeddingFunction._get_client()
|
||||
if self.name in self.text_embedding_models:
|
||||
rs = client.embed(texts=[text], model=self.name, **kwargs)
|
||||
elif self.name in self.multimodal_embedding_models:
|
||||
rs = client.multimodal_embed(inputs=[[text]], model=self.name, **kwargs)
|
||||
else:
|
||||
rs = VoyageAIEmbeddingFunction._get_client().embed(
|
||||
texts=[text], model=self.name, **kwargs
|
||||
raise ValueError(
|
||||
f"Model {self.name} not supported to generate text embeddings"
|
||||
)
|
||||
|
||||
return rs.embeddings[0]
|
||||
|
||||
@@ -1741,12 +1741,14 @@ class AsyncQuery(AsyncQueryBase):
|
||||
a default `limit` of 10 will be used.
|
||||
|
||||
Typically, a single vector is passed in as the query. However, you can also
|
||||
pass in multiple vectors. This can be useful if you want to find the nearest
|
||||
vectors to multiple query vectors. This is not expected to be faster than
|
||||
making multiple queries concurrently; it is just a convenience method.
|
||||
If multiple vectors are passed in then an additional column `query_index`
|
||||
will be added to the results. This column will contain the index of the
|
||||
query vector that the result is nearest to.
|
||||
pass in multiple vectors. When multiple vectors are passed in, if the vector
|
||||
column is with multivector type, then the vectors will be treated as a single
|
||||
query. Or the vectors will be treated as multiple queries, this can be useful
|
||||
if you want to find the nearest vectors to multiple query vectors.
|
||||
This is not expected to be faster than making multiple queries concurrently;
|
||||
it is just a convenience method. If multiple vectors are passed in then
|
||||
an additional column `query_index` will be added to the results. This column
|
||||
will contain the index of the query vector that the result is nearest to.
|
||||
"""
|
||||
if query_vector is None:
|
||||
raise ValueError("query_vector can not be None")
|
||||
|
||||
@@ -2856,6 +2856,8 @@ class AsyncTable:
|
||||
async_query = async_query.with_row_id()
|
||||
|
||||
if query.vector:
|
||||
# we need the schema to get the vector column type
|
||||
# to determine whether the vectors is batch queries or not
|
||||
async_query = (
|
||||
async_query.nearest_to(query.vector)
|
||||
.distance_type(query.metric)
|
||||
|
||||
@@ -223,7 +223,7 @@ def inf_vector_column_query(schema: pa.Schema) -> str:
|
||||
vector_col_count = 0
|
||||
for field_name in schema.names:
|
||||
field = schema.field(field_name)
|
||||
if pa.types.is_fixed_size_list(field.type):
|
||||
if is_vector_column(field.type):
|
||||
vector_col_count += 1
|
||||
if vector_col_count > 1:
|
||||
raise ValueError(
|
||||
@@ -231,7 +231,6 @@ def inf_vector_column_query(schema: pa.Schema) -> str:
|
||||
"Please specify the vector column name "
|
||||
"for vector search"
|
||||
)
|
||||
break
|
||||
elif vector_col_count == 1:
|
||||
vector_col_name = field_name
|
||||
if vector_col_count == 0:
|
||||
@@ -242,6 +241,29 @@ def inf_vector_column_query(schema: pa.Schema) -> str:
|
||||
return vector_col_name
|
||||
|
||||
|
||||
def is_vector_column(data_type: pa.DataType) -> bool:
|
||||
"""
|
||||
Check if the column is a vector column.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data_type : pa.DataType
|
||||
The data type of the column.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool: True if the column is a vector column.
|
||||
"""
|
||||
if pa.types.is_fixed_size_list(data_type) and (
|
||||
pa.types.is_floating(data_type.value_type)
|
||||
or pa.types.is_uint8(data_type.value_type)
|
||||
):
|
||||
return True
|
||||
elif pa.types.is_list(data_type):
|
||||
return is_vector_column(data_type.value_type)
|
||||
return False
|
||||
|
||||
|
||||
def infer_vector_column_name(
|
||||
schema: pa.Schema,
|
||||
query_type: str,
|
||||
|
||||
@@ -68,6 +68,60 @@ async def table_struct_async(tmp_path) -> AsyncTable:
|
||||
return await conn.create_table("test_struct", table)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multivec_table() -> lancedb.table.Table:
|
||||
db = lancedb.connect("memory://")
|
||||
# Generate 256 rows of data
|
||||
num_rows = 256
|
||||
|
||||
# Generate data for each column
|
||||
vector_data = [
|
||||
[[i, i + 1], [i + 2, i + 3]] for i in range(num_rows)
|
||||
] # Adjust to match nested structure
|
||||
id_data = list(range(1, num_rows + 1))
|
||||
float_field_data = [float(i) for i in range(1, num_rows + 1)]
|
||||
|
||||
# Create the Arrow table
|
||||
df = pa.table(
|
||||
{
|
||||
"vector": pa.array(
|
||||
vector_data, type=pa.list_(pa.list_(pa.float32(), list_size=2))
|
||||
),
|
||||
"id": pa.array(id_data),
|
||||
"float_field": pa.array(float_field_data),
|
||||
}
|
||||
)
|
||||
return db.create_table("test", df)
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def multivec_table_async(tmp_path) -> AsyncTable:
|
||||
conn = await lancedb.connect_async(
|
||||
"memory://", read_consistency_interval=timedelta(seconds=0)
|
||||
)
|
||||
# Generate 256 rows of data
|
||||
num_rows = 256
|
||||
|
||||
# Generate data for each column
|
||||
vector_data = [
|
||||
[[i, i + 1], [i + 2, i + 3]] for i in range(num_rows)
|
||||
] # Adjust to match nested structure
|
||||
id_data = list(range(1, num_rows + 1))
|
||||
float_field_data = [float(i) for i in range(1, num_rows + 1)]
|
||||
|
||||
# Create the Arrow table
|
||||
df = pa.table(
|
||||
{
|
||||
"vector": pa.array(
|
||||
vector_data, type=pa.list_(pa.list_(pa.float32(), list_size=2))
|
||||
),
|
||||
"id": pa.array(id_data),
|
||||
"float_field": pa.array(float_field_data),
|
||||
}
|
||||
)
|
||||
return await conn.create_table("test_async", df)
|
||||
|
||||
|
||||
def test_cast(table):
|
||||
class TestModel(LanceModel):
|
||||
vector: Vector(2)
|
||||
@@ -177,6 +231,62 @@ async def test_distance_range_async(table_async: AsyncTable):
|
||||
assert res["_distance"].to_pylist() == [min_dist, max_dist]
|
||||
|
||||
|
||||
def test_multivector(multivec_table: lancedb.table.Table):
|
||||
# create index on multivector
|
||||
multivec_table.create_index(
|
||||
metric="cosine",
|
||||
vector_column_name="vector",
|
||||
index_type="IVF_PQ",
|
||||
num_partitions=1,
|
||||
num_sub_vectors=2,
|
||||
)
|
||||
|
||||
# query with single vector
|
||||
q = [1, 2]
|
||||
rs = multivec_table.search(q).to_arrow()
|
||||
|
||||
# query with multiple vectors
|
||||
q = [[1, 2], [1, 2]]
|
||||
rs2 = multivec_table.search(q).to_arrow()
|
||||
assert len(rs2) == len(rs)
|
||||
for i in range(2):
|
||||
assert rs2["_distance"][i].as_py() == rs["_distance"][i].as_py() * 2
|
||||
|
||||
# can't query with vector that dim not matched
|
||||
with pytest.raises(Exception):
|
||||
multivec_table.search([1, 2, 3]).to_arrow()
|
||||
# can't query with vector list that some dim not matched
|
||||
with pytest.raises(Exception):
|
||||
multivec_table.search([[1, 2], [1, 2, 3]]).to_arrow()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multivector_async(multivec_table_async: AsyncTable):
|
||||
# create index on multivector
|
||||
await multivec_table_async.create_index(
|
||||
"vector",
|
||||
config=IvfPq(distance_type="cosine", num_partitions=1, num_sub_vectors=2),
|
||||
)
|
||||
|
||||
# query with single vector
|
||||
q = [1, 2]
|
||||
rs = await multivec_table_async.query().nearest_to(q).to_arrow()
|
||||
|
||||
# query with multiple vectors
|
||||
q = [[1, 2], [1, 2]]
|
||||
rs2 = await multivec_table_async.query().nearest_to(q).to_arrow()
|
||||
assert len(rs2) == len(rs)
|
||||
for i in range(2):
|
||||
assert rs2["_distance"][i].as_py() == rs["_distance"][i].as_py() * 2
|
||||
|
||||
# can't query with vector that dim not matched
|
||||
with pytest.raises(Exception):
|
||||
await multivec_table_async.query().nearest_to([1, 2, 3]).to_arrow()
|
||||
# can't query with vector list that some dim not matched
|
||||
with pytest.raises(Exception):
|
||||
await multivec_table_async.query().nearest_to([[1, 2], [1, 2, 3]]).to_arrow()
|
||||
|
||||
|
||||
def test_vector_query_with_no_limit(table):
|
||||
with pytest.raises(ValueError):
|
||||
LanceVectorQueryBuilder(table, [0, 0], "vector").limit(0).select(
|
||||
@@ -448,11 +558,13 @@ async def test_query_to_pandas_flatten_async(table_struct_async: AsyncTable):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_to_polars_async(table_async: AsyncTable):
|
||||
schema = await table_async.schema()
|
||||
num_columns = len(schema.names)
|
||||
df = await table_async.query().to_polars()
|
||||
assert df.shape == (2, 5)
|
||||
assert df.shape == (2, num_columns)
|
||||
|
||||
df = await table_async.query().where("id < 0").to_polars()
|
||||
assert df.shape == (0, 5)
|
||||
assert df.shape == (0, num_columns)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@@ -52,7 +52,7 @@ fn create_some_records() -> Result<Box<dyn RecordBatchReader + Send>> {
|
||||
.iter()
|
||||
.step_by(1024)
|
||||
.take(500)
|
||||
.map(|w| *w)
|
||||
.copied()
|
||||
.collect::<Vec<_>>();
|
||||
let n_terms = 3;
|
||||
let batches = RecordBatchIterator::new(
|
||||
@@ -95,7 +95,7 @@ async fn search_index(table: &Table) -> Result<()> {
|
||||
.iter()
|
||||
.step_by(1024)
|
||||
.take(500)
|
||||
.map(|w| *w)
|
||||
.copied()
|
||||
.collect::<Vec<_>>();
|
||||
let query = words[0].to_owned();
|
||||
println!("Searching for: {}", query);
|
||||
|
||||
@@ -18,7 +18,7 @@ use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::AsArray;
|
||||
use arrow::array::{AsArray, FixedSizeListBuilder, Float32Builder};
|
||||
use arrow::datatypes::{Float32Type, UInt8Type};
|
||||
use arrow_array::{RecordBatchIterator, RecordBatchReader};
|
||||
use arrow_schema::{DataType, Field, Schema, SchemaRef};
|
||||
@@ -1902,68 +1902,74 @@ impl TableInternal for NativeTable {
|
||||
options: QueryExecutionOptions,
|
||||
) -> Result<Arc<dyn ExecutionPlan>> {
|
||||
let ds_ref = self.dataset.get().await?;
|
||||
let mut column = query.column.clone();
|
||||
let schema = ds_ref.schema();
|
||||
|
||||
let mut query_vector = query.query_vector.first().cloned();
|
||||
if query.query_vector.len() > 1 {
|
||||
// If there are multiple query vectors, create a plan for each of them and union them.
|
||||
let query_vecs = query.query_vector.clone();
|
||||
let plan_futures = query_vecs
|
||||
.into_iter()
|
||||
.map(|query_vector| {
|
||||
let mut sub_query = query.clone();
|
||||
sub_query.query_vector = vec![query_vector];
|
||||
let options_ref = options.clone();
|
||||
async move { self.create_plan(&sub_query, options_ref).await }
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let plans = futures::future::try_join_all(plan_futures).await?;
|
||||
return Table::multi_vector_plan(plans);
|
||||
if column.is_none() {
|
||||
// Infer a vector column with the same dimension of the query vector.
|
||||
let arrow_schema = Schema::from(ds_ref.schema());
|
||||
column = Some(default_vector_column(
|
||||
&arrow_schema,
|
||||
Some(query.query_vector[0].len() as i32),
|
||||
)?);
|
||||
}
|
||||
let vector_field = schema.field(column.as_ref().unwrap()).unwrap();
|
||||
if let DataType::List(_) = vector_field.data_type() {
|
||||
// it's multivector, then the vectors should be treated as single query
|
||||
// concatenate the vectors into a FixedSizeList<FixedSizeList<_>>
|
||||
// it's also possible to concatenate the vectors into a List<FixedSizeList<_>>,
|
||||
// but FixedSizeList is more efficient and easier to construct
|
||||
let vectors = query
|
||||
.query_vector
|
||||
.iter()
|
||||
.map(|arr| arr.as_ref())
|
||||
.collect::<Vec<_>>();
|
||||
let dim = vectors[0].len();
|
||||
let mut fsl_builder = FixedSizeListBuilder::with_capacity(
|
||||
Float32Builder::with_capacity(dim),
|
||||
dim as i32,
|
||||
vectors.len(),
|
||||
);
|
||||
for vec in vectors {
|
||||
fsl_builder
|
||||
.values()
|
||||
.append_slice(vec.as_primitive::<Float32Type>().values());
|
||||
fsl_builder.append(true);
|
||||
}
|
||||
query_vector = Some(Arc::new(fsl_builder.finish()));
|
||||
} else {
|
||||
// If there are multiple query vectors, create a plan for each of them and union them.
|
||||
let query_vecs = query.query_vector.clone();
|
||||
let plan_futures = query_vecs
|
||||
.into_iter()
|
||||
.map(|query_vector| {
|
||||
let mut sub_query = query.clone();
|
||||
sub_query.query_vector = vec![query_vector];
|
||||
let options_ref = options.clone();
|
||||
async move { self.create_plan(&sub_query, options_ref).await }
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let plans = futures::future::try_join_all(plan_futures).await?;
|
||||
return Table::multi_vector_plan(plans);
|
||||
}
|
||||
}
|
||||
|
||||
let mut scanner: Scanner = ds_ref.scan();
|
||||
|
||||
if let Some(query_vector) = query.query_vector.first() {
|
||||
if let Some(query_vector) = query_vector {
|
||||
// If there is a vector query, default to limit=10 if unspecified
|
||||
let column = if let Some(col) = query.column.as_ref() {
|
||||
col.clone()
|
||||
let column = if let Some(col) = column {
|
||||
col
|
||||
} else {
|
||||
// Infer a vector column with the same dimension of the query vector.
|
||||
let arrow_schema = Schema::from(ds_ref.schema());
|
||||
default_vector_column(&arrow_schema, Some(query_vector.len() as i32))?
|
||||
};
|
||||
|
||||
let field = ds_ref.schema().field(&column).ok_or(Error::Schema {
|
||||
message: format!("Column {} not found in dataset schema", column),
|
||||
})?;
|
||||
|
||||
let mut is_binary = false;
|
||||
if let arrow_schema::DataType::FixedSizeList(element, dim) = field.data_type() {
|
||||
match element.data_type() {
|
||||
e_type if e_type.is_floating() => {}
|
||||
e_type if *e_type == DataType::UInt8 => {
|
||||
is_binary = true;
|
||||
}
|
||||
_ => {
|
||||
return Err(Error::InvalidInput {
|
||||
message: format!(
|
||||
"The data type of the vector column '{}' is not a floating point type",
|
||||
column
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
if dim != query_vector.len() as i32 {
|
||||
return Err(Error::InvalidInput {
|
||||
message: format!(
|
||||
"The dimension of the query vector does not match with the dimension of the vector column '{}': \
|
||||
query dim={}, expected vector dim={}",
|
||||
column,
|
||||
query_vector.len(),
|
||||
dim,
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let (_, element_type) = lance::index::vector::utils::get_vector_type(schema, &column)?;
|
||||
let is_binary = matches!(element_type, DataType::UInt8);
|
||||
if is_binary {
|
||||
let query_vector = arrow::compute::cast(&query_vector, &DataType::UInt8)?;
|
||||
let query_vector = query_vector.as_primitive::<UInt8Type>();
|
||||
@@ -1973,10 +1979,9 @@ impl TableInternal for NativeTable {
|
||||
query.base.limit.unwrap_or(DEFAULT_TOP_K),
|
||||
)?;
|
||||
} else {
|
||||
let query_vector = query_vector.as_primitive::<Float32Type>();
|
||||
scanner.nearest(
|
||||
&column,
|
||||
query_vector,
|
||||
query_vector.as_ref(),
|
||||
query.base.limit.unwrap_or(DEFAULT_TOP_K),
|
||||
)?;
|
||||
}
|
||||
|
||||
@@ -108,13 +108,8 @@ pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result
|
||||
let candidates = schema
|
||||
.fields()
|
||||
.iter()
|
||||
.filter_map(|field| match field.data_type() {
|
||||
arrow_schema::DataType::FixedSizeList(f, d)
|
||||
if (f.data_type().is_floating() || f.data_type() == &DataType::UInt8)
|
||||
&& dim.map(|expect| *d == expect).unwrap_or(true) =>
|
||||
{
|
||||
Some(field.name())
|
||||
}
|
||||
.filter_map(|field| match inf_vector_dim(field) {
|
||||
Some(d) if dim.is_none() || dim == Some(d) => Some(field.name()),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
@@ -138,6 +133,20 @@ pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result
|
||||
}
|
||||
}
|
||||
|
||||
fn inf_vector_dim(field: &arrow_schema::Field) -> Option<i32> {
|
||||
match field.data_type() {
|
||||
arrow_schema::DataType::FixedSizeList(f, d) => {
|
||||
if f.data_type().is_floating() || f.data_type() == &DataType::UInt8 {
|
||||
Some(*d)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
arrow_schema::DataType::List(f) => inf_vector_dim(f),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn supported_btree_data_type(dtype: &DataType) -> bool {
|
||||
dtype.is_integer()
|
||||
|| dtype.is_floating()
|
||||
@@ -171,9 +180,10 @@ pub fn supported_fts_data_type(dtype: &DataType) -> bool {
|
||||
|
||||
pub fn supported_vector_data_type(dtype: &DataType) -> bool {
|
||||
match dtype {
|
||||
DataType::FixedSizeList(inner, _) => {
|
||||
DataType::is_floating(inner.data_type()) || *inner.data_type() == DataType::UInt8
|
||||
DataType::FixedSizeList(field, _) => {
|
||||
field.data_type().is_floating() || field.data_type() == &DataType::UInt8
|
||||
}
|
||||
DataType::List(field) => supported_vector_data_type(field.data_type()),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user