mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-27 23:12:58 +00:00
Compare commits
9 Commits
python-v0.
...
2493-where
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
16a7e29639 | ||
|
|
0e7a218d62 | ||
|
|
167fccc427 | ||
|
|
2bffbcefa5 | ||
|
|
905552f993 | ||
|
|
e4898c9313 | ||
|
|
cab36d94b2 | ||
|
|
b64252d4fd | ||
|
|
6fc006072c |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.21.0"
|
||||
current_version = "0.21.1-beta.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
8
Cargo.lock
generated
8
Cargo.lock
generated
@@ -4314,7 +4314,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb"
|
||||
version = "0.21.0"
|
||||
version = "0.21.1-beta.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4401,7 +4401,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-node"
|
||||
version = "0.21.0"
|
||||
version = "0.21.1-beta.1"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-ipc",
|
||||
@@ -4426,7 +4426,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-nodejs"
|
||||
version = "0.21.0"
|
||||
version = "0.21.1-beta.1"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-ipc",
|
||||
@@ -4446,7 +4446,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-python"
|
||||
version = "0.24.0"
|
||||
version = "0.24.1-beta.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"env_logger",
|
||||
|
||||
@@ -46,7 +46,7 @@ datafusion-execution = "48.0"
|
||||
datafusion-expr = "48.0"
|
||||
datafusion-physical-plan = "48.0"
|
||||
env_logger = "0.11"
|
||||
half = { "version" = "=2.6.0", default-features = false, features = [
|
||||
half = { "version" = "2.6.0", default-features = false, features = [
|
||||
"num-traits",
|
||||
] }
|
||||
futures = "0"
|
||||
|
||||
@@ -71,6 +71,45 @@ with merge insert, enable both `when_matched_update_all()` and
|
||||
If a column is nullable, it can be omitted from input data and it will be
|
||||
considered `null`. Columns can also be provided in any order.
|
||||
|
||||
### Conditional Updates
|
||||
|
||||
You can add a `where` clause to `when_matched_update_all()` to only update rows
|
||||
that meet certain conditions. When using the `where` parameter, you must prefix
|
||||
column names with either `source.` (for the new data) or `target.` (for the
|
||||
existing data) to specify which table you're referencing.
|
||||
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
# Only update rows where the target's status is 'active'
|
||||
table.merge_insert("id")
|
||||
.when_matched_update_all(where="target.status = 'active'")
|
||||
.when_not_matched_insert_all()
|
||||
.execute(new_data)
|
||||
|
||||
# Only update if the new price is higher than the existing price
|
||||
table.merge_insert("product_id")
|
||||
.when_matched_update_all(where="source.price > target.price")
|
||||
.when_not_matched_insert_all()
|
||||
.execute(new_data)
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
|
||||
```typescript
|
||||
// Only update rows where the target's status is 'active'
|
||||
await table.mergeInsert("id")
|
||||
.whenMatchedUpdateAll({ where: "target.status = 'active'" })
|
||||
.whenNotMatchedInsertAll()
|
||||
.execute(newData);
|
||||
|
||||
// Only update if the new price is higher than the existing price
|
||||
await table.mergeInsert("product_id")
|
||||
.whenMatchedUpdateAll({ where: "source.price > target.price" })
|
||||
.whenNotMatchedInsertAll()
|
||||
.execute(newData);
|
||||
```
|
||||
|
||||
## Insert-if-not-exists
|
||||
|
||||
To avoid inserting duplicate rows, you can use the insert-if-not-exists command.
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.21.0-final.0</version>
|
||||
<version>0.21.1-beta.1</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.21.0-final.0</version>
|
||||
<version>0.21.1-beta.1</version>
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<name>LanceDB Parent</name>
|
||||
|
||||
74
node/package-lock.json
generated
74
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.21.0",
|
||||
"version": "0.21.1-beta.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "vectordb",
|
||||
"version": "0.21.0",
|
||||
"version": "0.21.1-beta.1",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
@@ -52,11 +52,11 @@
|
||||
"uuid": "^9.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.21.0",
|
||||
"@lancedb/vectordb-darwin-x64": "0.21.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.21.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.21.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.21.0"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.21.1-beta.1",
|
||||
"@lancedb/vectordb-darwin-x64": "0.21.1-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.21.1-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.21.1-beta.1",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.21.1-beta.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@apache-arrow/ts": "^14.0.2",
|
||||
@@ -326,66 +326,6 @@
|
||||
"@jridgewell/sourcemap-codec": "^1.4.10"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||
"version": "0.21.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.0.tgz",
|
||||
"integrity": "sha512-FTKbdYG36mvQ75tId+esyRfRjIBzryRhAp/6h51tiXy8gsq/TButuiPdqIXeonNModEjhu8wkzsGFwgjCcePow==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||
"version": "0.21.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.0.tgz",
|
||||
"integrity": "sha512-vGaFBr2sQZWE0mudg3LGTHiRE7p2Qce2ogiE2VAf1DLAJ4MrIhgVmEttf966ausIwNCgml+5AzUntw6zC0Oyuw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||
"version": "0.21.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.0.tgz",
|
||||
"integrity": "sha512-KlxqhnX4eBN6rDqrPgf/x/vLpnHK2UcIzNLpiOZzSAhooCmKmnNpfs/EXt+KRFloEQMy25AHpMpqkSPv1Q2oDA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||
"version": "0.21.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.0.tgz",
|
||||
"integrity": "sha512-t7dkFV6kga3rqXR1rH460GdpSVuY0tw7CIc0KqsIIkBcXzUPA1n0QDoazdwPQ1MXzG/+F5WWCTp3dYWx2vP0Lw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||
"version": "0.21.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.0.tgz",
|
||||
"integrity": "sha512-yovkW61RECBTsu0S527BX1uW0jCAZK9MAsJTknXmDjp78figx4/AyI5ajT63u/Uo4EKoheeNiiLdyU4v+A9YVw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
]
|
||||
},
|
||||
"node_modules/@neon-rs/cli": {
|
||||
"version": "0.0.160",
|
||||
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.21.0",
|
||||
"version": "0.21.1-beta.1",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"private": false,
|
||||
"main": "dist/index.js",
|
||||
@@ -89,10 +89,10 @@
|
||||
}
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-x64": "0.21.0",
|
||||
"@lancedb/vectordb-darwin-arm64": "0.21.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.21.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.21.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.21.0"
|
||||
"@lancedb/vectordb-darwin-x64": "0.21.1-beta.1",
|
||||
"@lancedb/vectordb-darwin-arm64": "0.21.1-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.21.1-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.21.1-beta.1",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.21.1-beta.1"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.21.0"
|
||||
version = "0.21.1-beta.1"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.21.0",
|
||||
"version": "0.21.1-beta.1",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.21.0",
|
||||
"version": "0.21.1-beta.1",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.21.0",
|
||||
"version": "0.21.1-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.21.0",
|
||||
"version": "0.21.1-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.21.0",
|
||||
"version": "0.21.1-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.21.0",
|
||||
"version": "0.21.1-beta.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.21.0",
|
||||
"version": "0.21.1-beta.1",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.21.0",
|
||||
"version": "0.21.1-beta.1",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.21.0",
|
||||
"version": "0.21.1-beta.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.21.0",
|
||||
"version": "0.21.1-beta.1",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.21.0",
|
||||
"version": "0.21.1-beta.1",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.24.1-beta.0"
|
||||
current_version = "0.24.1-beta.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.24.1-beta.0"
|
||||
version = "0.24.1-beta.1"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -45,6 +45,16 @@ class LanceMergeInsertBuilder(object):
|
||||
If there are multiple matches then the behavior is undefined.
|
||||
Currently this causes multiple copies of the row to be created
|
||||
but that behavior is subject to change.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
where : Optional[str], default None
|
||||
A SQL filter expression to apply to matched rows. The filter must
|
||||
specify whether you are referencing the source table (new data) or
|
||||
the target table (existing data) by prefixing column names with
|
||||
"source." or "target." respectively.
|
||||
|
||||
Example: "target.status = 'active'" or "source.price > target.price"
|
||||
"""
|
||||
self._when_matched_update_all = True
|
||||
self._when_matched_update_all_condition = where
|
||||
|
||||
@@ -18,7 +18,7 @@ from lancedb._lancedb import (
|
||||
UpdateResult,
|
||||
)
|
||||
from lancedb.embeddings.base import EmbeddingFunctionConfig
|
||||
from lancedb.index import FTS, BTree, Bitmap, HnswPq, HnswSq, IvfFlat, IvfPq, LabelList
|
||||
from lancedb.index import FTS, BTree, Bitmap, HnswSq, IvfFlat, IvfPq, LabelList
|
||||
from lancedb.remote.db import LOOP
|
||||
import pyarrow as pa
|
||||
|
||||
@@ -89,7 +89,7 @@ class RemoteTable(Table):
|
||||
|
||||
def to_pandas(self):
|
||||
"""to_pandas() is not yet supported on LanceDB cloud."""
|
||||
return NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")
|
||||
raise NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")
|
||||
|
||||
def checkout(self, version: Union[int, str]):
|
||||
return LOOP.run(self._table.checkout(version))
|
||||
@@ -186,6 +186,8 @@ class RemoteTable(Table):
|
||||
accelerator: Optional[str] = None,
|
||||
index_type="vector",
|
||||
wait_timeout: Optional[timedelta] = None,
|
||||
*,
|
||||
num_bits: int = 8,
|
||||
):
|
||||
"""Create an index on the table.
|
||||
Currently, the only parameters that matter are
|
||||
@@ -220,11 +222,6 @@ class RemoteTable(Table):
|
||||
>>> table.create_index("l2", "vector") # doctest: +SKIP
|
||||
"""
|
||||
|
||||
if num_partitions is not None:
|
||||
logging.warning(
|
||||
"num_partitions is not supported on LanceDB cloud."
|
||||
"This parameter will be tuned automatically."
|
||||
)
|
||||
if num_sub_vectors is not None:
|
||||
logging.warning(
|
||||
"num_sub_vectors is not supported on LanceDB cloud."
|
||||
@@ -244,13 +241,21 @@ class RemoteTable(Table):
|
||||
|
||||
index_type = index_type.upper()
|
||||
if index_type == "VECTOR" or index_type == "IVF_PQ":
|
||||
config = IvfPq(distance_type=metric)
|
||||
config = IvfPq(
|
||||
distance_type=metric,
|
||||
num_partitions=num_partitions,
|
||||
num_sub_vectors=num_sub_vectors,
|
||||
num_bits=num_bits,
|
||||
)
|
||||
elif index_type == "IVF_HNSW_PQ":
|
||||
config = HnswPq(distance_type=metric)
|
||||
raise ValueError(
|
||||
"IVF_HNSW_PQ is not supported on LanceDB cloud."
|
||||
"Please use IVF_HNSW_SQ instead."
|
||||
)
|
||||
elif index_type == "IVF_HNSW_SQ":
|
||||
config = HnswSq(distance_type=metric)
|
||||
config = HnswSq(distance_type=metric, num_partitions=num_partitions)
|
||||
elif index_type == "IVF_FLAT":
|
||||
config = IvfFlat(distance_type=metric)
|
||||
config = IvfFlat(distance_type=metric, num_partitions=num_partitions)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unknown vector index type: {index_type}. Valid options are"
|
||||
|
||||
@@ -210,6 +210,25 @@ async def test_retry_error():
|
||||
assert cause.status_code == 429
|
||||
|
||||
|
||||
def test_table_unimplemented_functions():
|
||||
def handler(request):
|
||||
if request.path == "/v1/table/test/create/?mode=create":
|
||||
request.send_response(200)
|
||||
request.send_header("Content-Type", "application/json")
|
||||
request.end_headers()
|
||||
request.wfile.write(b"{}")
|
||||
else:
|
||||
request.send_response(404)
|
||||
request.end_headers()
|
||||
|
||||
with mock_lancedb_connection(handler) as db:
|
||||
table = db.create_table("test", [{"id": 1}])
|
||||
with pytest.raises(NotImplementedError):
|
||||
table.to_arrow()
|
||||
with pytest.raises(NotImplementedError):
|
||||
table.to_pandas()
|
||||
|
||||
|
||||
def test_table_add_in_threadpool():
|
||||
def handler(request):
|
||||
if request.path == "/v1/table/test/insert/":
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-node"
|
||||
version = "0.21.0"
|
||||
version = "0.21.1-beta.1"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.21.0"
|
||||
version = "0.21.1-beta.1"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
@@ -57,6 +57,8 @@ use crate::{
|
||||
};
|
||||
|
||||
const REQUEST_TIMEOUT_HEADER: HeaderName = HeaderName::from_static("x-request-timeout-ms");
|
||||
const METRIC_TYPE_KEY: &str = "metric_type";
|
||||
const INDEX_TYPE_KEY: &str = "index_type";
|
||||
|
||||
pub struct RemoteTags<'a, S: HttpSend = Sender> {
|
||||
inner: &'a RemoteTable<S>,
|
||||
@@ -997,23 +999,53 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
"column": column
|
||||
});
|
||||
|
||||
let (index_type, distance_type) = match index.index {
|
||||
match index.index {
|
||||
// TODO: Should we pass the actual index parameters? SaaS does not
|
||||
// yet support them.
|
||||
Index::IvfFlat(index) => ("IVF_FLAT", Some(index.distance_type)),
|
||||
Index::IvfPq(index) => ("IVF_PQ", Some(index.distance_type)),
|
||||
Index::IvfHnswSq(index) => ("IVF_HNSW_SQ", Some(index.distance_type)),
|
||||
Index::BTree(_) => ("BTREE", None),
|
||||
Index::Bitmap(_) => ("BITMAP", None),
|
||||
Index::LabelList(_) => ("LABEL_LIST", None),
|
||||
Index::IvfFlat(index) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_FLAT".to_string());
|
||||
body[METRIC_TYPE_KEY] =
|
||||
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
|
||||
if let Some(num_partitions) = index.num_partitions {
|
||||
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
|
||||
}
|
||||
}
|
||||
Index::IvfPq(index) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_PQ".to_string());
|
||||
body[METRIC_TYPE_KEY] =
|
||||
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
|
||||
if let Some(num_partitions) = index.num_partitions {
|
||||
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
|
||||
}
|
||||
if let Some(num_bits) = index.num_bits {
|
||||
body["num_bits"] = serde_json::Value::Number(num_bits.into());
|
||||
}
|
||||
}
|
||||
Index::IvfHnswSq(index) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_HNSW_SQ".to_string());
|
||||
body[METRIC_TYPE_KEY] =
|
||||
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
|
||||
if let Some(num_partitions) = index.num_partitions {
|
||||
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
|
||||
}
|
||||
}
|
||||
Index::BTree(_) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("BTREE".to_string());
|
||||
}
|
||||
Index::Bitmap(_) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("BITMAP".to_string());
|
||||
}
|
||||
Index::LabelList(_) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("LABEL_LIST".to_string());
|
||||
}
|
||||
Index::FTS(fts) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("FTS".to_string());
|
||||
let params = serde_json::to_value(&fts).map_err(|e| Error::InvalidInput {
|
||||
message: format!("failed to serialize FTS index params {:?}", e),
|
||||
})?;
|
||||
for (key, value) in params.as_object().unwrap() {
|
||||
body[key] = value.clone();
|
||||
}
|
||||
("FTS", None)
|
||||
}
|
||||
Index::Auto => {
|
||||
let schema = self.schema().await?;
|
||||
@@ -1023,9 +1055,11 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
message: format!("Column {} not found in schema", column),
|
||||
})?;
|
||||
if supported_vector_data_type(field.data_type()) {
|
||||
("IVF_PQ", Some(DistanceType::L2))
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_PQ".to_string());
|
||||
body[METRIC_TYPE_KEY] =
|
||||
serde_json::Value::String(DistanceType::L2.to_string().to_lowercase());
|
||||
} else if supported_btree_data_type(field.data_type()) {
|
||||
("BTREE", None)
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("BTREE".to_string());
|
||||
} else {
|
||||
return Err(Error::NotSupported {
|
||||
message: format!(
|
||||
@@ -1042,12 +1076,6 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
})
|
||||
}
|
||||
};
|
||||
body["index_type"] = serde_json::Value::String(index_type.into());
|
||||
if let Some(distance_type) = distance_type {
|
||||
// Phalanx expects this to be lowercase right now.
|
||||
body["metric_type"] =
|
||||
serde_json::Value::String(distance_type.to_string().to_lowercase());
|
||||
}
|
||||
|
||||
let request = request.json(&body);
|
||||
|
||||
@@ -1429,11 +1457,12 @@ mod tests {
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures::{future::BoxFuture, StreamExt, TryFutureExt};
|
||||
use lance_index::scalar::inverted::query::MatchQuery;
|
||||
use lance_index::scalar::FullTextSearchQuery;
|
||||
use lance_index::scalar::{FullTextSearchQuery, InvertedIndexParams};
|
||||
use reqwest::Body;
|
||||
use rstest::rstest;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::index::vector::IvfFlatIndexBuilder;
|
||||
use crate::index::vector::{IvfFlatIndexBuilder, IvfHnswSqIndexBuilder};
|
||||
use crate::remote::db::DEFAULT_SERVER_VERSION;
|
||||
use crate::remote::JSON_CONTENT_TYPE;
|
||||
use crate::{
|
||||
@@ -2433,29 +2462,79 @@ mod tests {
|
||||
let cases = [
|
||||
(
|
||||
"IVF_FLAT",
|
||||
Some("hamming"),
|
||||
json!({
|
||||
"metric_type": "hamming",
|
||||
}),
|
||||
Index::IvfFlat(IvfFlatIndexBuilder::default().distance_type(DistanceType::Hamming)),
|
||||
),
|
||||
("IVF_PQ", Some("l2"), Index::IvfPq(Default::default())),
|
||||
(
|
||||
"IVF_FLAT",
|
||||
json!({
|
||||
"metric_type": "hamming",
|
||||
"num_partitions": 128,
|
||||
}),
|
||||
Index::IvfFlat(
|
||||
IvfFlatIndexBuilder::default()
|
||||
.distance_type(DistanceType::Hamming)
|
||||
.num_partitions(128),
|
||||
),
|
||||
),
|
||||
(
|
||||
"IVF_PQ",
|
||||
Some("cosine"),
|
||||
Index::IvfPq(IvfPqIndexBuilder::default().distance_type(DistanceType::Cosine)),
|
||||
json!({
|
||||
"metric_type": "l2",
|
||||
}),
|
||||
Index::IvfPq(Default::default()),
|
||||
),
|
||||
(
|
||||
"IVF_PQ",
|
||||
json!({
|
||||
"metric_type": "cosine",
|
||||
"num_partitions": 128,
|
||||
"num_bits": 4,
|
||||
}),
|
||||
Index::IvfPq(
|
||||
IvfPqIndexBuilder::default()
|
||||
.distance_type(DistanceType::Cosine)
|
||||
.num_partitions(128)
|
||||
.num_bits(4),
|
||||
),
|
||||
),
|
||||
(
|
||||
"IVF_HNSW_SQ",
|
||||
Some("l2"),
|
||||
json!({
|
||||
"metric_type": "l2",
|
||||
}),
|
||||
Index::IvfHnswSq(Default::default()),
|
||||
),
|
||||
(
|
||||
"IVF_HNSW_SQ",
|
||||
json!({
|
||||
"metric_type": "l2",
|
||||
"num_partitions": 128,
|
||||
}),
|
||||
Index::IvfHnswSq(
|
||||
IvfHnswSqIndexBuilder::default()
|
||||
.distance_type(DistanceType::L2)
|
||||
.num_partitions(128),
|
||||
),
|
||||
),
|
||||
// HNSW_PQ isn't yet supported on SaaS
|
||||
("BTREE", None, Index::BTree(Default::default())),
|
||||
("BITMAP", None, Index::Bitmap(Default::default())),
|
||||
("LABEL_LIST", None, Index::LabelList(Default::default())),
|
||||
("FTS", None, Index::FTS(Default::default())),
|
||||
("BTREE", json!({}), Index::BTree(Default::default())),
|
||||
("BITMAP", json!({}), Index::Bitmap(Default::default())),
|
||||
(
|
||||
"LABEL_LIST",
|
||||
json!({}),
|
||||
Index::LabelList(Default::default()),
|
||||
),
|
||||
(
|
||||
"FTS",
|
||||
serde_json::to_value(InvertedIndexParams::default()).unwrap(),
|
||||
Index::FTS(Default::default()),
|
||||
),
|
||||
];
|
||||
|
||||
for (index_type, distance_type, index) in cases {
|
||||
let params = index.clone();
|
||||
for (index_type, expected_body, index) in cases {
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(request.url().path(), "/v1/table/my_table/create_index/");
|
||||
@@ -2465,19 +2544,9 @@ mod tests {
|
||||
);
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||
let mut expected_body = serde_json::json!({
|
||||
"column": "a",
|
||||
"index_type": index_type,
|
||||
});
|
||||
if let Some(distance_type) = distance_type {
|
||||
expected_body["metric_type"] = distance_type.to_lowercase().into();
|
||||
}
|
||||
if let Index::FTS(fts) = ¶ms {
|
||||
let params = serde_json::to_value(fts).unwrap();
|
||||
for (key, value) in params.as_object().unwrap() {
|
||||
expected_body[key] = value.clone();
|
||||
}
|
||||
}
|
||||
let mut expected_body = expected_body.clone();
|
||||
expected_body["column"] = "a".into();
|
||||
expected_body[INDEX_TYPE_KEY] = index_type.into();
|
||||
|
||||
assert_eq!(body, expected_body);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user