mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-27 15:12:53 +00:00
Compare commits
18 Commits
python-v0.
...
remote-ver
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b7fed59278 | ||
|
|
60ad82b6ad | ||
|
|
134258308c | ||
|
|
d36334d565 | ||
|
|
131c01d702 | ||
|
|
b2f88f0b29 | ||
|
|
f2e3989831 | ||
|
|
83ae52938a | ||
|
|
267aa83bf8 | ||
|
|
cc72050206 | ||
|
|
72543c8b9d | ||
|
|
97d6210c33 | ||
|
|
a3d0c27b0a | ||
|
|
b23d8abcdd | ||
|
|
e3ea5cf9b9 | ||
|
|
4f8b086175 | ||
|
|
72330fb759 | ||
|
|
e3b2c5f438 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.13.0-beta.2"
|
current_version = "0.13.0"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
4
.github/workflows/npm-publish.yml
vendored
4
.github/workflows/npm-publish.yml
vendored
@@ -460,7 +460,7 @@ jobs:
|
|||||||
|
|
||||||
release:
|
release:
|
||||||
name: vectordb NPM Publish
|
name: vectordb NPM Publish
|
||||||
needs: [node, node-macos, node-linux, node-windows, node-windows-arm64]
|
needs: [node, node-macos, node-linux, node-windows]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -500,7 +500,7 @@ jobs:
|
|||||||
|
|
||||||
release-nodejs:
|
release-nodejs:
|
||||||
name: lancedb NPM Publish
|
name: lancedb NPM Publish
|
||||||
needs: [nodejs-macos, nodejs-linux, nodejs-windows, nodejs-windows-arm64]
|
needs: [nodejs-macos, nodejs-linux, nodejs-windows]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
|
|||||||
18
Cargo.toml
18
Cargo.toml
@@ -18,18 +18,18 @@ repository = "https://github.com/lancedb/lancedb"
|
|||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
keywords = ["lancedb", "lance", "database", "vector", "search"]
|
keywords = ["lancedb", "lance", "database", "vector", "search"]
|
||||||
categories = ["database-implementations"]
|
categories = ["database-implementations"]
|
||||||
rust-version = "1.80.0" # TODO: lower this once we upgrade Lance again.
|
rust-version = "1.80.0" # TODO: lower this once we upgrade Lance again.
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.19.2", "features" = [
|
lance = { "version" = "=0.19.3", "features" = [
|
||||||
"dynamodb",
|
"dynamodb",
|
||||||
]}
|
], git = "https://github.com/lancedb/lance.git", tag = "v0.19.3-beta.1" }
|
||||||
lance-index = "=0.19.2"
|
lance-index = { version = "=0.19.3", git = "https://github.com/lancedb/lance.git", tag = "v0.19.3-beta.1" }
|
||||||
lance-linalg = "=0.19.2"
|
lance-linalg = { version = "=0.19.3", git = "https://github.com/lancedb/lance.git", tag = "v0.19.3-beta.1" }
|
||||||
lance-table = "=0.19.2"
|
lance-table = { version = "=0.19.3", git = "https://github.com/lancedb/lance.git", tag = "v0.19.3-beta.1" }
|
||||||
lance-testing = "=0.19.2"
|
lance-testing = { version = "=0.19.3", git = "https://github.com/lancedb/lance.git", tag = "v0.19.3-beta.1" }
|
||||||
lance-datafusion = "=0.19.2"
|
lance-datafusion = { version = "=0.19.3", git = "https://github.com/lancedb/lance.git", tag = "v0.19.3-beta.1" }
|
||||||
lance-encoding = "=0.19.2"
|
lance-encoding = { version = "=0.19.3", git = "https://github.com/lancedb/lance.git", tag = "v0.19.3-beta.1" }
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "52.2", optional = false }
|
arrow = { version = "52.2", optional = false }
|
||||||
arrow-array = "52.2"
|
arrow-array = "52.2"
|
||||||
|
|||||||
21
docs/package-lock.json
generated
21
docs/package-lock.json
generated
@@ -19,7 +19,7 @@
|
|||||||
},
|
},
|
||||||
"../node": {
|
"../node": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.4.6",
|
"version": "0.12.0",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -31,9 +31,7 @@
|
|||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@apache-arrow/ts": "^14.0.2",
|
|
||||||
"@neon-rs/load": "^0.0.74",
|
"@neon-rs/load": "^0.0.74",
|
||||||
"apache-arrow": "^14.0.2",
|
|
||||||
"axios": "^1.4.0"
|
"axios": "^1.4.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
@@ -46,6 +44,7 @@
|
|||||||
"@types/temp": "^0.9.1",
|
"@types/temp": "^0.9.1",
|
||||||
"@types/uuid": "^9.0.3",
|
"@types/uuid": "^9.0.3",
|
||||||
"@typescript-eslint/eslint-plugin": "^5.59.1",
|
"@typescript-eslint/eslint-plugin": "^5.59.1",
|
||||||
|
"apache-arrow-old": "npm:apache-arrow@13.0.0",
|
||||||
"cargo-cp-artifact": "^0.1",
|
"cargo-cp-artifact": "^0.1",
|
||||||
"chai": "^4.3.7",
|
"chai": "^4.3.7",
|
||||||
"chai-as-promised": "^7.1.1",
|
"chai-as-promised": "^7.1.1",
|
||||||
@@ -62,15 +61,19 @@
|
|||||||
"ts-node-dev": "^2.0.0",
|
"ts-node-dev": "^2.0.0",
|
||||||
"typedoc": "^0.24.7",
|
"typedoc": "^0.24.7",
|
||||||
"typedoc-plugin-markdown": "^3.15.3",
|
"typedoc-plugin-markdown": "^3.15.3",
|
||||||
"typescript": "*",
|
"typescript": "^5.1.0",
|
||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.4.6",
|
"@lancedb/vectordb-darwin-arm64": "0.12.0",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.4.6",
|
"@lancedb/vectordb-darwin-x64": "0.12.0",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.4.6",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.12.0",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.4.6",
|
"@lancedb/vectordb-linux-x64-gnu": "0.12.0",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.4.6"
|
"@lancedb/vectordb-win32-x64-msvc": "0.12.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@apache-arrow/ts": "^14.0.2",
|
||||||
|
"apache-arrow": "^14.0.2"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"../node/node_modules/apache-arrow": {
|
"../node/node_modules/apache-arrow": {
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ Supported parameters (to be passed in `create` method) are:
|
|||||||
|
|
||||||
| Parameter | Type | Default Value | Description |
|
| Parameter | Type | Default Value | Description |
|
||||||
|---|---|--------|---------|
|
|---|---|--------|---------|
|
||||||
| `name` | `str` | `"voyage-3"` | The model ID of the model to use. Supported base models for Text Embeddings: voyage-3, voyage-3-lite, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2 |
|
| `name` | `str` | `None` | The model ID of the model to use. Supported base models for Text Embeddings: voyage-3, voyage-3-lite, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2 |
|
||||||
| `input_type` | `str` | `None` | Type of the input text. Default to None. Other options: query, document. |
|
| `input_type` | `str` | `None` | Type of the input text. Default to None. Other options: query, document. |
|
||||||
| `truncation` | `bool` | `True` | Whether to truncate the input texts to fit within the context length. |
|
| `truncation` | `bool` | `True` | Whether to truncate the input texts to fit within the context length. |
|
||||||
|
|
||||||
|
|||||||
@@ -53,6 +53,7 @@ These functions are registered by default to handle text embeddings.
|
|||||||
| [**Jina Embeddings**](available_embedding_models/text_embedding_functions/jina_embedding.md "jina") | 🔗 World-class embedding models to improve your search and RAG systems. You will need **jina api key**. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/jina.png" alt="Jina Icon" width="90" height="35">](available_embedding_models/text_embedding_functions/jina_embedding.md) |
|
| [**Jina Embeddings**](available_embedding_models/text_embedding_functions/jina_embedding.md "jina") | 🔗 World-class embedding models to improve your search and RAG systems. You will need **jina api key**. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/jina.png" alt="Jina Icon" width="90" height="35">](available_embedding_models/text_embedding_functions/jina_embedding.md) |
|
||||||
| [ **AWS Bedrock Functions**](available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md "bedrock-text") | ☁️ AWS Bedrock supports multiple base models for generating text embeddings. You need to setup the AWS credentials to use this embedding function. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/aws_bedrock.png" alt="AWS Bedrock Icon" width="120" height="35">](available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md) |
|
| [ **AWS Bedrock Functions**](available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md "bedrock-text") | ☁️ AWS Bedrock supports multiple base models for generating text embeddings. You need to setup the AWS credentials to use this embedding function. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/aws_bedrock.png" alt="AWS Bedrock Icon" width="120" height="35">](available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md) |
|
||||||
| [**IBM Watsonx.ai**](available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md "watsonx") | 💡 Generate text embeddings using IBM's watsonx.ai platform. **Note**: watsonx.ai library is an optional dependency. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/watsonx.png" alt="Watsonx Icon" width="140" height="35">](available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md) |
|
| [**IBM Watsonx.ai**](available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md "watsonx") | 💡 Generate text embeddings using IBM's watsonx.ai platform. **Note**: watsonx.ai library is an optional dependency. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/watsonx.png" alt="Watsonx Icon" width="140" height="35">](available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md) |
|
||||||
|
| [**VoyageAI Embeddings**](available_embedding_models/text_embedding_functions/voyageai_embedding.md "voyageai") | 🌕 Voyage AI provides cutting-edge embedding and rerankers. This will help you get started with **VoyageAI** embedding models using LanceDB. Using voyageai API requires voyageai package. Install it via `pip`. | [<img src="https://www.voyageai.com/logo.svg" alt="VoyageAI Icon" width="140" height="35">](available_embedding_models/text_embedding_functions/voyageai_embedding.md) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -66,6 +67,7 @@ These functions are registered by default to handle text embeddings.
|
|||||||
[jina-key]: "jina"
|
[jina-key]: "jina"
|
||||||
[aws-key]: "bedrock-text"
|
[aws-key]: "bedrock-text"
|
||||||
[watsonx-key]: "watsonx"
|
[watsonx-key]: "watsonx"
|
||||||
|
[voyageai-key]: "voyageai"
|
||||||
|
|
||||||
|
|
||||||
## Multi-modal Embedding Functions🖼️
|
## Multi-modal Embedding Functions🖼️
|
||||||
|
|||||||
@@ -160,3 +160,32 @@ To search for a phrase, the index must be created with `with_position=True`:
|
|||||||
table.create_fts_index("text", use_tantivy=False, with_position=True)
|
table.create_fts_index("text", use_tantivy=False, with_position=True)
|
||||||
```
|
```
|
||||||
This will allow you to search for phrases, but it will also significantly increase the index size and indexing time.
|
This will allow you to search for phrases, but it will also significantly increase the index size and indexing time.
|
||||||
|
|
||||||
|
|
||||||
|
## Incremental indexing
|
||||||
|
|
||||||
|
LanceDB supports incremental indexing, which means you can add new records to the table without reindexing the entire table.
|
||||||
|
|
||||||
|
This can make the query more efficient, especially when the table is large and the new records are relatively small.
|
||||||
|
|
||||||
|
=== "Python"
|
||||||
|
|
||||||
|
```python
|
||||||
|
table.add([{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}])
|
||||||
|
table.optimize()
|
||||||
|
```
|
||||||
|
|
||||||
|
=== "TypeScript"
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
await tbl.add([{ vector: [3.1, 4.1], text: "Frodo was a happy puppy" }]);
|
||||||
|
await tbl.optimize();
|
||||||
|
```
|
||||||
|
|
||||||
|
=== "Rust"
|
||||||
|
|
||||||
|
```rust
|
||||||
|
let more_data: Box<dyn RecordBatchReader + Send> = create_some_records()?;
|
||||||
|
tbl.add(more_data).execute().await?;
|
||||||
|
tbl.optimize(OptimizeAction::All).execute().await?;
|
||||||
|
```
|
||||||
|
|||||||
@@ -274,7 +274,7 @@ table = db.create_table(table_name, schema=Content)
|
|||||||
|
|
||||||
Sometimes your data model may contain nested objects.
|
Sometimes your data model may contain nested objects.
|
||||||
For example, you may want to store the document string
|
For example, you may want to store the document string
|
||||||
and the document soure name as a nested Document object:
|
and the document source name as a nested Document object:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
class Document(BaseModel):
|
class Document(BaseModel):
|
||||||
@@ -466,7 +466,7 @@ You can create an empty table for scenarios where you want to add data to the ta
|
|||||||
|
|
||||||
## Adding to a table
|
## Adding to a table
|
||||||
|
|
||||||
After a table has been created, you can always add more data to it usind the `add` method
|
After a table has been created, you can always add more data to it using the `add` method
|
||||||
|
|
||||||
=== "Python"
|
=== "Python"
|
||||||
You can add any of the valid data structures accepted by LanceDB table, i.e, `dict`, `list[dict]`, `pd.DataFrame`, or `Iterator[pa.RecordBatch]`. Below are some examples.
|
You can add any of the valid data structures accepted by LanceDB table, i.e, `dict`, `list[dict]`, `pd.DataFrame`, or `Iterator[pa.RecordBatch]`. Below are some examples.
|
||||||
@@ -535,7 +535,7 @@ After a table has been created, you can always add more data to it usind the `ad
|
|||||||
```
|
```
|
||||||
|
|
||||||
??? "Ingesting Pydantic models with LanceDB embedding API"
|
??? "Ingesting Pydantic models with LanceDB embedding API"
|
||||||
When using LanceDB's embedding API, you can add Pydantic models directly to the table. LanceDB will automatically convert the `vector` field to a vector before adding it to the table. You need to specify the default value of `vector` feild as None to allow LanceDB to automatically vectorize the data.
|
When using LanceDB's embedding API, you can add Pydantic models directly to the table. LanceDB will automatically convert the `vector` field to a vector before adding it to the table. You need to specify the default value of `vector` field as None to allow LanceDB to automatically vectorize the data.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import lancedb
|
import lancedb
|
||||||
@@ -880,4 +880,4 @@ There are three possible settings for `read_consistency_interval`:
|
|||||||
|
|
||||||
Learn the best practices on creating an ANN index and getting the most out of it.
|
Learn the best practices on creating an ANN index and getting the most out of it.
|
||||||
|
|
||||||
[^1]: The `vectordb` package is a legacy package that is deprecated in favor of `@lancedb/lancedb`. The `vectordb` package will continue to receive bug fixes and security updates until September 2024. We recommend all new projects use `@lancedb/lancedb`. See the [migration guide](migration.md) for more information.
|
[^1]: The `vectordb` package is a legacy package that is deprecated in favor of `@lancedb/lancedb`. The `vectordb` package will continue to receive bug fixes and security updates until September 2024. We recommend all new projects use `@lancedb/lancedb`. See the [migration guide](../migration.md) for more information.
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ LanceDB comes with some built-in rerankers. Some of the rerankers that are avail
|
|||||||
| `CrossEncoderReranker` | Uses a cross-encoder model to rerank search results | Vector, FTS, Hybrid |
|
| `CrossEncoderReranker` | Uses a cross-encoder model to rerank search results | Vector, FTS, Hybrid |
|
||||||
| `ColbertReranker` | Uses a colbert model to rerank search results | Vector, FTS, Hybrid |
|
| `ColbertReranker` | Uses a colbert model to rerank search results | Vector, FTS, Hybrid |
|
||||||
| `OpenaiReranker`(Experimental) | Uses OpenAI's chat model to rerank search results | Vector, FTS, Hybrid |
|
| `OpenaiReranker`(Experimental) | Uses OpenAI's chat model to rerank search results | Vector, FTS, Hybrid |
|
||||||
|
| `VoyageAIReranker` | Uses voyageai Reranker API to rerank results | Vector, FTS, Hybrid |
|
||||||
|
|
||||||
|
|
||||||
## Using a Reranker
|
## Using a Reranker
|
||||||
@@ -73,6 +74,7 @@ LanceDB comes with some built-in rerankers. Here are some of the rerankers that
|
|||||||
- [Jina Reranker](./jina.md)
|
- [Jina Reranker](./jina.md)
|
||||||
- [AnswerDotAI Rerankers](./answerdotai.md)
|
- [AnswerDotAI Rerankers](./answerdotai.md)
|
||||||
- [Reciprocal Rank Fusion Reranker](./rrf.md)
|
- [Reciprocal Rank Fusion Reranker](./rrf.md)
|
||||||
|
- [VoyageAI Reranker](./voyageai.md)
|
||||||
|
|
||||||
## Creating Custom Rerankers
|
## Creating Custom Rerankers
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.13.0-beta.2</version>
|
<version>0.13.0-final.0</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.13.0-beta.2</version>
|
<version>0.13.0-final.0</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
<name>LanceDB Parent</name>
|
<name>LanceDB Parent</name>
|
||||||
|
|||||||
82
node/package-lock.json
generated
82
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.13.0-beta.2",
|
"version": "0.13.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.13.0-beta.2",
|
"version": "0.13.0",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -52,12 +52,12 @@
|
|||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.13.0-beta.2",
|
"@lancedb/vectordb-darwin-arm64": "0.13.0",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.13.0-beta.2",
|
"@lancedb/vectordb-darwin-x64": "0.13.0",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.13.0-beta.2",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.13.0",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.13.0-beta.2",
|
"@lancedb/vectordb-linux-x64-gnu": "0.13.0",
|
||||||
"@lancedb/vectordb-win32-arm64-msvc": "0.13.0-beta.2",
|
"@lancedb/vectordb-win32-arm64-msvc": "0.13.0",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.13.0-beta.2"
|
"@lancedb/vectordb-win32-x64-msvc": "0.13.0"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@apache-arrow/ts": "^14.0.2",
|
"@apache-arrow/ts": "^14.0.2",
|
||||||
@@ -327,6 +327,66 @@
|
|||||||
"@jridgewell/sourcemap-codec": "^1.4.10"
|
"@jridgewell/sourcemap-codec": "^1.4.10"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||||
|
"version": "0.13.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.13.0.tgz",
|
||||||
|
"integrity": "sha512-8hdcjkRmgrdQYf1jN+DyZae40LIv8UUfnWy70Uid5qy63sSvRW/+MvIdqIPFr9QlLUXmpyyQuX0y3bZhUR99cQ==",
|
||||||
|
"cpu": [
|
||||||
|
"arm64"
|
||||||
|
],
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"darwin"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||||
|
"version": "0.13.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.13.0.tgz",
|
||||||
|
"integrity": "sha512-fWzAY4l5SQtNfMYh80v+M66ugZHhdxbkpk5mNEv6Zsug3DL6kRj3Uv31/i0wgzY6F5G3LUlbjZerN+eTnDLwOw==",
|
||||||
|
"cpu": [
|
||||||
|
"x64"
|
||||||
|
],
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"darwin"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||||
|
"version": "0.13.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.13.0.tgz",
|
||||||
|
"integrity": "sha512-ltwAT9baOSuR5YiGykQXPC8/HGYF13vpI47qxhP9yfgiz9pA8EUn8p8YrBRzq7J4DIZ4b8JSVDXQnMIqEtB4Kg==",
|
||||||
|
"cpu": [
|
||||||
|
"arm64"
|
||||||
|
],
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||||
|
"version": "0.13.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.13.0.tgz",
|
||||||
|
"integrity": "sha512-MiT/RBlMPGGRh7BX+MXwRuNiiUnKmuDcHH8nm88IH28T7TQxXIbA9w6UpSg5m9f3DgKQI2K8oLi29oKIB8ZwDQ==",
|
||||||
|
"cpu": [
|
||||||
|
"x64"
|
||||||
|
],
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||||
|
"version": "0.13.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.13.0.tgz",
|
||||||
|
"integrity": "sha512-SovP/hwWYLJIy65DKbVuXlBPTb/nwvVpTO6dh9zRch+L5ek6JmVAkwsfeTS2p5bMa8VPujsCXYUAVuCDEJU8wg==",
|
||||||
|
"cpu": [
|
||||||
|
"x64"
|
||||||
|
],
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"win32"
|
||||||
|
]
|
||||||
|
},
|
||||||
"node_modules/@neon-rs/cli": {
|
"node_modules/@neon-rs/cli": {
|
||||||
"version": "0.0.160",
|
"version": "0.0.160",
|
||||||
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
|
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
|
||||||
@@ -1441,9 +1501,9 @@
|
|||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/cross-spawn": {
|
"node_modules/cross-spawn": {
|
||||||
"version": "7.0.3",
|
"version": "7.0.6",
|
||||||
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
|
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
|
||||||
"integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
|
"integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"path-key": "^3.1.0",
|
"path-key": "^3.1.0",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.13.0-beta.2",
|
"version": "0.13.0",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
@@ -89,11 +89,11 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.13.0-beta.2",
|
"@lancedb/vectordb-darwin-arm64": "0.13.0",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.13.0-beta.2",
|
"@lancedb/vectordb-darwin-x64": "0.13.0",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.13.0-beta.2",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.13.0",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.13.0-beta.2",
|
"@lancedb/vectordb-linux-x64-gnu": "0.13.0",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.13.0-beta.2",
|
"@lancedb/vectordb-win32-x64-msvc": "0.13.0",
|
||||||
"@lancedb/vectordb-win32-arm64-msvc": "0.13.0-beta.2"
|
"@lancedb/vectordb-win32-arm64-msvc": "0.13.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.13.0-beta.2"
|
version = "0.13.0"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|||||||
@@ -477,6 +477,54 @@ describe("When creating an index", () => {
|
|||||||
expect(rst.numRows).toBe(1);
|
expect(rst.numRows).toBe(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("should create and search IVF_HNSW indices", async () => {
|
||||||
|
await tbl.createIndex("vec", {
|
||||||
|
config: Index.hnswSq(),
|
||||||
|
});
|
||||||
|
|
||||||
|
// check index directory
|
||||||
|
const indexDir = path.join(tmpDir.name, "test.lance", "_indices");
|
||||||
|
expect(fs.readdirSync(indexDir)).toHaveLength(1);
|
||||||
|
const indices = await tbl.listIndices();
|
||||||
|
expect(indices.length).toBe(1);
|
||||||
|
expect(indices[0]).toEqual({
|
||||||
|
name: "vec_idx",
|
||||||
|
indexType: "IvfHnswSq",
|
||||||
|
columns: ["vec"],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Search without specifying the column
|
||||||
|
let rst = await tbl
|
||||||
|
.query()
|
||||||
|
.limit(2)
|
||||||
|
.nearestTo(queryVec)
|
||||||
|
.distanceType("dot")
|
||||||
|
.toArrow();
|
||||||
|
expect(rst.numRows).toBe(2);
|
||||||
|
|
||||||
|
// Search using `vectorSearch`
|
||||||
|
rst = await tbl.vectorSearch(queryVec).limit(2).toArrow();
|
||||||
|
expect(rst.numRows).toBe(2);
|
||||||
|
|
||||||
|
// Search with specifying the column
|
||||||
|
const rst2 = await tbl
|
||||||
|
.query()
|
||||||
|
.limit(2)
|
||||||
|
.nearestTo(queryVec)
|
||||||
|
.column("vec")
|
||||||
|
.toArrow();
|
||||||
|
expect(rst2.numRows).toBe(2);
|
||||||
|
expect(rst.toString()).toEqual(rst2.toString());
|
||||||
|
|
||||||
|
// test offset
|
||||||
|
rst = await tbl.query().limit(2).offset(1).nearestTo(queryVec).toArrow();
|
||||||
|
expect(rst.numRows).toBe(1);
|
||||||
|
|
||||||
|
// test ef
|
||||||
|
rst = await tbl.query().limit(2).nearestTo(queryVec).ef(100).toArrow();
|
||||||
|
expect(rst.numRows).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
it("should be able to query unindexed data", async () => {
|
it("should be able to query unindexed data", async () => {
|
||||||
await tbl.createIndex("vec");
|
await tbl.createIndex("vec");
|
||||||
await tbl.add([
|
await tbl.add([
|
||||||
|
|||||||
@@ -385,6 +385,20 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the number of candidates to consider during the search
|
||||||
|
*
|
||||||
|
* This argument is only used when the vector column has an HNSW index.
|
||||||
|
* If there is no index then this value is ignored.
|
||||||
|
*
|
||||||
|
* Increasing this value will increase the recall of your query but will
|
||||||
|
* also increase the latency of your query. The default value is 1.5*limit.
|
||||||
|
*/
|
||||||
|
ef(ef: number): VectorQuery {
|
||||||
|
super.doCall((inner) => inner.ef(ef));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the vector column to query
|
* Set the vector column to query
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.13.0-beta.2",
|
"version": "0.13.0",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.13.0-beta.2",
|
"version": "0.13.0",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.13.0-beta.2",
|
"version": "0.13.0",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.13.0-beta.2",
|
"version": "0.13.0",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.13.0-beta.2",
|
"version": "0.13.0",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.13.0-beta.2",
|
"version": "0.13.0",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
10
nodejs/package-lock.json
generated
10
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.13.0-beta.1",
|
"version": "0.13.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.13.0-beta.1",
|
"version": "0.13.0",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -6052,9 +6052,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/cross-spawn": {
|
"node_modules/cross-spawn": {
|
||||||
"version": "7.0.3",
|
"version": "7.0.6",
|
||||||
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
|
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
|
||||||
"integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
|
"integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
|
||||||
"devOptional": true,
|
"devOptional": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"path-key": "^3.1.0",
|
"path-key": "^3.1.0",
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
"vector database",
|
"vector database",
|
||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"version": "0.13.0-beta.2",
|
"version": "0.13.0",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
|
|||||||
@@ -167,6 +167,11 @@ impl VectorQuery {
|
|||||||
self.inner = self.inner.clone().nprobes(nprobe as usize);
|
self.inner = self.inner.clone().nprobes(nprobe as usize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[napi]
|
||||||
|
pub fn ef(&mut self, ef: u32) {
|
||||||
|
self.inner = self.inner.clone().ef(ef as usize);
|
||||||
|
}
|
||||||
|
|
||||||
#[napi]
|
#[napi]
|
||||||
pub fn bypass_vector_index(&mut self) {
|
pub fn bypass_vector_index(&mut self) {
|
||||||
self.inner = self.inner.clone().bypass_vector_index()
|
self.inner = self.inner.clone().bypass_vector_index()
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ crate-type = ["cdylib"]
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
arrow = { version = "52.1", features = ["pyarrow"] }
|
arrow = { version = "52.1", features = ["pyarrow"] }
|
||||||
lancedb = { path = "../rust/lancedb" }
|
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||||
env_logger.workspace = true
|
env_logger.workspace = true
|
||||||
pyo3 = { version = "0.21", features = ["extension-module", "abi3-py38", "gil-refs"] }
|
pyo3 = { version = "0.21", features = ["extension-module", "abi3-py38", "gil-refs"] }
|
||||||
# Using this fork for now: https://github.com/awestlake87/pyo3-asyncio/issues/119
|
# Using this fork for now: https://github.com/awestlake87/pyo3-asyncio/issues/119
|
||||||
@@ -33,6 +33,11 @@ pyo3-build-config = { version = "0.20.3", features = [
|
|||||||
] }
|
] }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["remote"]
|
default = ["default-tls", "remote"]
|
||||||
fp16kernels = ["lancedb/fp16kernels"]
|
fp16kernels = ["lancedb/fp16kernels"]
|
||||||
remote = ["lancedb/remote"]
|
remote = ["lancedb/remote"]
|
||||||
|
|
||||||
|
# TLS
|
||||||
|
default-tls = ["lancedb/default-tls"]
|
||||||
|
native-tls = ["lancedb/native-tls"]
|
||||||
|
rustls-tls = ["lancedb/rustls-tls"]
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ name = "lancedb"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"deprecation",
|
"deprecation",
|
||||||
"nest-asyncio~=1.0",
|
"nest-asyncio~=1.0",
|
||||||
"pylance==0.19.2",
|
"pylance==0.19.3b1",
|
||||||
"tqdm>=4.27.0",
|
"tqdm>=4.27.0",
|
||||||
"pydantic>=1.10",
|
"pydantic>=1.10",
|
||||||
"packaging",
|
"packaging",
|
||||||
|
|||||||
@@ -131,6 +131,8 @@ class Query(pydantic.BaseModel):
|
|||||||
|
|
||||||
fast_search: bool = False
|
fast_search: bool = False
|
||||||
|
|
||||||
|
ef: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
class LanceQueryBuilder(ABC):
|
class LanceQueryBuilder(ABC):
|
||||||
"""An abstract query builder. Subclasses are defined for vector search,
|
"""An abstract query builder. Subclasses are defined for vector search,
|
||||||
@@ -257,6 +259,7 @@ class LanceQueryBuilder(ABC):
|
|||||||
self._with_row_id = False
|
self._with_row_id = False
|
||||||
self._vector = None
|
self._vector = None
|
||||||
self._text = None
|
self._text = None
|
||||||
|
self._ef = None
|
||||||
|
|
||||||
@deprecation.deprecated(
|
@deprecation.deprecated(
|
||||||
deprecated_in="0.3.1",
|
deprecated_in="0.3.1",
|
||||||
@@ -638,6 +641,28 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
self._nprobes = nprobes
|
self._nprobes = nprobes
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def ef(self, ef: int) -> LanceVectorQueryBuilder:
|
||||||
|
"""Set the number of candidates to consider during search.
|
||||||
|
|
||||||
|
Higher values will yield better recall (more likely to find vectors if
|
||||||
|
they exist) at the expense of latency.
|
||||||
|
|
||||||
|
This only applies to the HNSW-related index.
|
||||||
|
The default value is 1.5 * limit.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
ef: int
|
||||||
|
The number of candidates to consider during search.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
LanceVectorQueryBuilder
|
||||||
|
The LanceQueryBuilder object.
|
||||||
|
"""
|
||||||
|
self._ef = ef
|
||||||
|
return self
|
||||||
|
|
||||||
def refine_factor(self, refine_factor: int) -> LanceVectorQueryBuilder:
|
def refine_factor(self, refine_factor: int) -> LanceVectorQueryBuilder:
|
||||||
"""Set the refine factor to use, increasing the number of vectors sampled.
|
"""Set the refine factor to use, increasing the number of vectors sampled.
|
||||||
|
|
||||||
@@ -700,6 +725,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
with_row_id=self._with_row_id,
|
with_row_id=self._with_row_id,
|
||||||
offset=self._offset,
|
offset=self._offset,
|
||||||
fast_search=self._fast_search,
|
fast_search=self._fast_search,
|
||||||
|
ef=self._ef,
|
||||||
)
|
)
|
||||||
result_set = self._table._execute_query(query, batch_size)
|
result_set = self._table._execute_query(query, batch_size)
|
||||||
if self._reranker is not None:
|
if self._reranker is not None:
|
||||||
@@ -1071,6 +1097,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
|||||||
self._vector_query.nprobes(self._nprobes)
|
self._vector_query.nprobes(self._nprobes)
|
||||||
if self._refine_factor:
|
if self._refine_factor:
|
||||||
self._vector_query.refine_factor(self._refine_factor)
|
self._vector_query.refine_factor(self._refine_factor)
|
||||||
|
if self._ef:
|
||||||
|
self._vector_query.ef(self._ef)
|
||||||
|
|
||||||
with ThreadPoolExecutor() as executor:
|
with ThreadPoolExecutor() as executor:
|
||||||
fts_future = executor.submit(self._fts_query.with_row_id(True).to_arrow)
|
fts_future = executor.submit(self._fts_query.with_row_id(True).to_arrow)
|
||||||
@@ -1197,6 +1225,29 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
|||||||
self._nprobes = nprobes
|
self._nprobes = nprobes
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def ef(self, ef: int) -> LanceHybridQueryBuilder:
|
||||||
|
"""
|
||||||
|
Set the number of candidates to consider during search.
|
||||||
|
|
||||||
|
Higher values will yield better recall (more likely to find vectors if
|
||||||
|
they exist) at the expense of latency.
|
||||||
|
|
||||||
|
This only applies to the HNSW-related index.
|
||||||
|
The default value is 1.5 * limit.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
ef: int
|
||||||
|
The number of candidates to consider during search.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
LanceHybridQueryBuilder
|
||||||
|
The LanceHybridQueryBuilder object.
|
||||||
|
"""
|
||||||
|
self._ef = ef
|
||||||
|
return self
|
||||||
|
|
||||||
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceHybridQueryBuilder:
|
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceHybridQueryBuilder:
|
||||||
"""Set the distance metric to use.
|
"""Set the distance metric to use.
|
||||||
|
|
||||||
@@ -1495,7 +1546,8 @@ class AsyncQuery(AsyncQueryBase):
|
|||||||
return pa.array(vec)
|
return pa.array(vec)
|
||||||
|
|
||||||
def nearest_to(
|
def nearest_to(
|
||||||
self, query_vector: Optional[Union[VEC, Tuple, List[VEC]]] = None
|
self,
|
||||||
|
query_vector: Union[VEC, Tuple, List[VEC]],
|
||||||
) -> AsyncVectorQuery:
|
) -> AsyncVectorQuery:
|
||||||
"""
|
"""
|
||||||
Find the nearest vectors to the given query vector.
|
Find the nearest vectors to the given query vector.
|
||||||
@@ -1542,6 +1594,9 @@ class AsyncQuery(AsyncQueryBase):
|
|||||||
will be added to the results. This column will contain the index of the
|
will be added to the results. This column will contain the index of the
|
||||||
query vector that the result is nearest to.
|
query vector that the result is nearest to.
|
||||||
"""
|
"""
|
||||||
|
if query_vector is None:
|
||||||
|
raise ValueError("query_vector can not be None")
|
||||||
|
|
||||||
if (
|
if (
|
||||||
isinstance(query_vector, list)
|
isinstance(query_vector, list)
|
||||||
and len(query_vector) > 0
|
and len(query_vector) > 0
|
||||||
@@ -1618,7 +1673,7 @@ class AsyncVectorQuery(AsyncQueryBase):
|
|||||||
"""
|
"""
|
||||||
Set the number of partitions to search (probe)
|
Set the number of partitions to search (probe)
|
||||||
|
|
||||||
This argument is only used when the vector column has an IVF PQ index.
|
This argument is only used when the vector column has an IVF-based index.
|
||||||
If there is no index then this value is ignored.
|
If there is no index then this value is ignored.
|
||||||
|
|
||||||
The IVF stage of IVF PQ divides the input into partitions (clusters) of
|
The IVF stage of IVF PQ divides the input into partitions (clusters) of
|
||||||
@@ -1640,6 +1695,21 @@ class AsyncVectorQuery(AsyncQueryBase):
|
|||||||
self._inner.nprobes(nprobes)
|
self._inner.nprobes(nprobes)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def ef(self, ef: int) -> AsyncVectorQuery:
|
||||||
|
"""
|
||||||
|
Set the number of candidates to consider during search
|
||||||
|
|
||||||
|
This argument is only used when the vector column has an HNSW index.
|
||||||
|
If there is no index then this value is ignored.
|
||||||
|
|
||||||
|
Increasing this value will increase the recall of your query but will also
|
||||||
|
increase the latency of your query. The default value is 1.5 * limit. This
|
||||||
|
default is good for many cases but the best value to use will depend on your
|
||||||
|
data and the recall that you need to achieve.
|
||||||
|
"""
|
||||||
|
self._inner.ef(ef)
|
||||||
|
return self
|
||||||
|
|
||||||
def refine_factor(self, refine_factor: int) -> AsyncVectorQuery:
|
def refine_factor(self, refine_factor: int) -> AsyncVectorQuery:
|
||||||
"""
|
"""
|
||||||
A multiplier to control how many additional rows are taken during the refine
|
A multiplier to control how many additional rows are taken during the refine
|
||||||
|
|||||||
@@ -86,6 +86,12 @@ class RemoteTable(Table):
|
|||||||
"""to_pandas() is not yet supported on LanceDB cloud."""
|
"""to_pandas() is not yet supported on LanceDB cloud."""
|
||||||
return NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")
|
return NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")
|
||||||
|
|
||||||
|
def checkout(self, version):
|
||||||
|
return self._loop.run_until_complete(self._table.checkout(version))
|
||||||
|
|
||||||
|
def checkout_latest(self):
|
||||||
|
return self._loop.run_until_complete(self._table.checkout_latest())
|
||||||
|
|
||||||
def list_indices(self):
|
def list_indices(self):
|
||||||
"""List all the indices on the table"""
|
"""List all the indices on the table"""
|
||||||
return self._loop.run_until_complete(self._table.list_indices())
|
return self._loop.run_until_complete(self._table.list_indices())
|
||||||
|
|||||||
@@ -1012,6 +1012,18 @@ class Table(ABC):
|
|||||||
The names of the columns to drop.
|
The names of the columns to drop.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def checkout(self):
|
||||||
|
"""
|
||||||
|
TODO comments
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def checkout_latest(self):
|
||||||
|
"""
|
||||||
|
TODO comments
|
||||||
|
"""
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def _dataset_uri(self) -> str:
|
def _dataset_uri(self) -> str:
|
||||||
return _table_uri(self._conn.uri, self.name)
|
return _table_uri(self._conn.uri, self.name)
|
||||||
@@ -1959,6 +1971,7 @@ class LanceTable(Table):
|
|||||||
"metric": query.metric,
|
"metric": query.metric,
|
||||||
"nprobes": query.nprobes,
|
"nprobes": query.nprobes,
|
||||||
"refine_factor": query.refine_factor,
|
"refine_factor": query.refine_factor,
|
||||||
|
"ef": query.ef,
|
||||||
}
|
}
|
||||||
return ds.scanner(
|
return ds.scanner(
|
||||||
columns=query.columns,
|
columns=query.columns,
|
||||||
@@ -2697,7 +2710,7 @@ class AsyncTable:
|
|||||||
|
|
||||||
def vector_search(
|
def vector_search(
|
||||||
self,
|
self,
|
||||||
query_vector: Optional[Union[VEC, Tuple]] = None,
|
query_vector: Union[VEC, Tuple],
|
||||||
) -> AsyncVectorQuery:
|
) -> AsyncVectorQuery:
|
||||||
"""
|
"""
|
||||||
Search the table with a given query vector.
|
Search the table with a given query vector.
|
||||||
@@ -2736,6 +2749,8 @@ class AsyncTable:
|
|||||||
async_query = async_query.refine_factor(query.refine_factor)
|
async_query = async_query.refine_factor(query.refine_factor)
|
||||||
if query.vector_column:
|
if query.vector_column:
|
||||||
async_query = async_query.column(query.vector_column)
|
async_query = async_query.column(query.vector_column)
|
||||||
|
if query.ef:
|
||||||
|
async_query = async_query.ef(query.ef)
|
||||||
|
|
||||||
if not query.prefilter:
|
if not query.prefilter:
|
||||||
async_query = async_query.postfilter()
|
async_query = async_query.postfilter()
|
||||||
|
|||||||
@@ -1,21 +1,9 @@
|
|||||||
# Copyright 2023 LanceDB Developers
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
import unittest.mock as mock
|
import unittest.mock as mock
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import lance
|
|
||||||
import lancedb
|
import lancedb
|
||||||
from lancedb.index import IvfPq
|
from lancedb.index import IvfPq
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -23,41 +11,15 @@ import pandas.testing as tm
|
|||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
from lancedb.db import LanceDBConnection
|
|
||||||
from lancedb.pydantic import LanceModel, Vector
|
from lancedb.pydantic import LanceModel, Vector
|
||||||
from lancedb.query import AsyncQueryBase, LanceVectorQueryBuilder, Query
|
from lancedb.query import AsyncQueryBase, LanceVectorQueryBuilder, Query
|
||||||
from lancedb.table import AsyncTable, LanceTable
|
from lancedb.table import AsyncTable, LanceTable
|
||||||
|
|
||||||
|
|
||||||
class MockTable:
|
@pytest.fixture(scope="module")
|
||||||
def __init__(self, tmp_path):
|
def table(tmpdir_factory) -> lancedb.table.Table:
|
||||||
self.uri = tmp_path
|
tmp_path = str(tmpdir_factory.mktemp("data"))
|
||||||
self._conn = LanceDBConnection(self.uri)
|
db = lancedb.connect(tmp_path)
|
||||||
|
|
||||||
def to_lance(self):
|
|
||||||
return lance.dataset(self.uri)
|
|
||||||
|
|
||||||
def _execute_query(self, query, batch_size: Optional[int] = None):
|
|
||||||
ds = self.to_lance()
|
|
||||||
return ds.scanner(
|
|
||||||
columns=query.columns,
|
|
||||||
filter=query.filter,
|
|
||||||
prefilter=query.prefilter,
|
|
||||||
nearest={
|
|
||||||
"column": query.vector_column,
|
|
||||||
"q": query.vector,
|
|
||||||
"k": query.k,
|
|
||||||
"metric": query.metric,
|
|
||||||
"nprobes": query.nprobes,
|
|
||||||
"refine_factor": query.refine_factor,
|
|
||||||
},
|
|
||||||
batch_size=batch_size,
|
|
||||||
offset=query.offset,
|
|
||||||
).to_reader()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def table(tmp_path) -> MockTable:
|
|
||||||
df = pa.table(
|
df = pa.table(
|
||||||
{
|
{
|
||||||
"vector": pa.array(
|
"vector": pa.array(
|
||||||
@@ -68,8 +30,7 @@ def table(tmp_path) -> MockTable:
|
|||||||
"float_field": pa.array([1.0, 2.0]),
|
"float_field": pa.array([1.0, 2.0]),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
lance.write_dataset(df, tmp_path)
|
return db.create_table("test", df)
|
||||||
return MockTable(tmp_path)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest_asyncio.fixture
|
@pytest_asyncio.fixture
|
||||||
@@ -126,6 +87,12 @@ def test_query_builder(table):
|
|||||||
assert all(np.array(rs[0]["vector"]) == [1, 2])
|
assert all(np.array(rs[0]["vector"]) == [1, 2])
|
||||||
|
|
||||||
|
|
||||||
|
def test_with_row_id(table: lancedb.table.Table):
|
||||||
|
rs = table.search().with_row_id(True).to_arrow()
|
||||||
|
assert "_rowid" in rs.column_names
|
||||||
|
assert rs["_rowid"].to_pylist() == [0, 1]
|
||||||
|
|
||||||
|
|
||||||
def test_vector_query_with_no_limit(table):
|
def test_vector_query_with_no_limit(table):
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
LanceVectorQueryBuilder(table, [0, 0], "vector").limit(0).select(
|
LanceVectorQueryBuilder(table, [0, 0], "vector").limit(0).select(
|
||||||
@@ -365,6 +332,12 @@ async def test_query_to_pandas_async(table_async: AsyncTable):
|
|||||||
assert df.shape == (0, 4)
|
assert df.shape == (0, 4)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_none_query(table_async: AsyncTable):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
await table_async.query().nearest_to(None).to_arrow()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_fast_search_async(tmp_path):
|
async def test_fast_search_async(tmp_path):
|
||||||
db = await lancedb.connect_async(tmp_path)
|
db = await lancedb.connect_async(tmp_path)
|
||||||
|
|||||||
@@ -185,6 +185,7 @@ def test_query_sync_minimal():
|
|||||||
"k": 10,
|
"k": 10,
|
||||||
"prefilter": False,
|
"prefilter": False,
|
||||||
"refine_factor": None,
|
"refine_factor": None,
|
||||||
|
"ef": None,
|
||||||
"vector": [1.0, 2.0, 3.0],
|
"vector": [1.0, 2.0, 3.0],
|
||||||
"nprobes": 20,
|
"nprobes": 20,
|
||||||
}
|
}
|
||||||
@@ -223,6 +224,7 @@ def test_query_sync_maximal():
|
|||||||
"refine_factor": 10,
|
"refine_factor": 10,
|
||||||
"vector": [1.0, 2.0, 3.0],
|
"vector": [1.0, 2.0, 3.0],
|
||||||
"nprobes": 5,
|
"nprobes": 5,
|
||||||
|
"ef": None,
|
||||||
"filter": "id > 0",
|
"filter": "id > 0",
|
||||||
"columns": ["id", "name"],
|
"columns": ["id", "name"],
|
||||||
"vector_column": "vector2",
|
"vector_column": "vector2",
|
||||||
@@ -318,6 +320,7 @@ def test_query_sync_hybrid():
|
|||||||
"refine_factor": None,
|
"refine_factor": None,
|
||||||
"vector": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
|
"vector": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
|
||||||
"nprobes": 20,
|
"nprobes": 20,
|
||||||
|
"ef": None,
|
||||||
"with_row_id": True,
|
"with_row_id": True,
|
||||||
}
|
}
|
||||||
return pa.table({"_rowid": [1, 2, 3], "_distance": [0.1, 0.2, 0.3]})
|
return pa.table({"_rowid": [1, 2, 3], "_distance": [0.1, 0.2, 0.3]})
|
||||||
|
|||||||
@@ -195,6 +195,10 @@ impl VectorQuery {
|
|||||||
self.inner = self.inner.clone().nprobes(nprobe as usize);
|
self.inner = self.inner.clone().nprobes(nprobe as usize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn ef(&mut self, ef: u32) {
|
||||||
|
self.inner = self.inner.clone().ef(ef as usize);
|
||||||
|
}
|
||||||
|
|
||||||
pub fn bypass_vector_index(&mut self) {
|
pub fn bypass_vector_index(&mut self) {
|
||||||
self.inner = self.inner.clone().bypass_vector_index()
|
self.inner = self.inner.clone().bypass_vector_index()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.13.0-beta.2"
|
version = "0.13.0"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.13.0-beta.2"
|
version = "0.13.0"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
@@ -48,9 +48,16 @@ async-openai = { version = "0.20.0", optional = true }
|
|||||||
serde_with = { version = "3.8.1" }
|
serde_with = { version = "3.8.1" }
|
||||||
aws-sdk-bedrockruntime = { version = "1.27.0", optional = true }
|
aws-sdk-bedrockruntime = { version = "1.27.0", optional = true }
|
||||||
# For remote feature
|
# For remote feature
|
||||||
reqwest = { version = "0.12.0", features = ["gzip", "json", "stream"], optional = true }
|
reqwest = { version = "0.12.0", default-features = false, features = [
|
||||||
rand = { version = "0.8.3", features = ["small_rng"], optional = true}
|
"charset",
|
||||||
http = { version = "1", optional = true } # Matching what is in reqwest
|
"gzip",
|
||||||
|
"http2",
|
||||||
|
"json",
|
||||||
|
"macos-system-configuration",
|
||||||
|
"stream",
|
||||||
|
], optional = true }
|
||||||
|
rand = { version = "0.8.3", features = ["small_rng"], optional = true }
|
||||||
|
http = { version = "1", optional = true } # Matching what is in reqwest
|
||||||
uuid = { version = "1.7.0", features = ["v4"], optional = true }
|
uuid = { version = "1.7.0", features = ["v4"], optional = true }
|
||||||
polars-arrow = { version = ">=0.37,<0.40.0", optional = true }
|
polars-arrow = { version = ">=0.37,<0.40.0", optional = true }
|
||||||
polars = { version = ">=0.37,<0.40.0", optional = true }
|
polars = { version = ">=0.37,<0.40.0", optional = true }
|
||||||
@@ -75,7 +82,7 @@ http-body = "1" # Matching reqwest
|
|||||||
|
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = []
|
default = ["default-tls"]
|
||||||
remote = ["dep:reqwest", "dep:http", "dep:rand", "dep:uuid"]
|
remote = ["dep:reqwest", "dep:http", "dep:rand", "dep:uuid"]
|
||||||
fp16kernels = ["lance-linalg/fp16kernels"]
|
fp16kernels = ["lance-linalg/fp16kernels"]
|
||||||
s3-test = []
|
s3-test = []
|
||||||
@@ -90,6 +97,11 @@ sentence-transformers = [
|
|||||||
"dep:tokenizers"
|
"dep:tokenizers"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# TLS
|
||||||
|
default-tls = ["reqwest?/default-tls"]
|
||||||
|
native-tls = ["reqwest?/native-tls"]
|
||||||
|
rustls-tls = ["reqwest?/rustls-tls"]
|
||||||
|
|
||||||
[[example]]
|
[[example]]
|
||||||
name = "openai"
|
name = "openai"
|
||||||
required-features = ["openai"]
|
required-features = ["openai"]
|
||||||
|
|||||||
@@ -704,6 +704,9 @@ pub struct VectorQuery {
|
|||||||
// IVF PQ - ANN search.
|
// IVF PQ - ANN search.
|
||||||
pub(crate) query_vector: Vec<Arc<dyn Array>>,
|
pub(crate) query_vector: Vec<Arc<dyn Array>>,
|
||||||
pub(crate) nprobes: usize,
|
pub(crate) nprobes: usize,
|
||||||
|
// The number of candidates to return during the refine step for HNSW,
|
||||||
|
// defaults to 1.5 * limit.
|
||||||
|
pub(crate) ef: Option<usize>,
|
||||||
pub(crate) refine_factor: Option<u32>,
|
pub(crate) refine_factor: Option<u32>,
|
||||||
pub(crate) distance_type: Option<DistanceType>,
|
pub(crate) distance_type: Option<DistanceType>,
|
||||||
/// Default is true. Set to false to enforce a brute force search.
|
/// Default is true. Set to false to enforce a brute force search.
|
||||||
@@ -717,6 +720,7 @@ impl VectorQuery {
|
|||||||
column: None,
|
column: None,
|
||||||
query_vector: Vec::new(),
|
query_vector: Vec::new(),
|
||||||
nprobes: 20,
|
nprobes: 20,
|
||||||
|
ef: None,
|
||||||
refine_factor: None,
|
refine_factor: None,
|
||||||
distance_type: None,
|
distance_type: None,
|
||||||
use_index: true,
|
use_index: true,
|
||||||
@@ -776,6 +780,18 @@ impl VectorQuery {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the number of candidates to return during the refine step for HNSW
|
||||||
|
///
|
||||||
|
/// This argument is only used when the vector column has an HNSW index.
|
||||||
|
/// If there is no index then this value is ignored.
|
||||||
|
///
|
||||||
|
/// Increasing this value will increase the recall of your query but will
|
||||||
|
/// also increase the latency of your query. The default value is 1.5*limit.
|
||||||
|
pub fn ef(mut self, ef: usize) -> Self {
|
||||||
|
self.ef = Some(ef);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// A multiplier to control how many additional rows are taken during the refine step
|
/// A multiplier to control how many additional rows are taken during the refine step
|
||||||
///
|
///
|
||||||
/// This argument is only used when the vector column has an IVF PQ index.
|
/// This argument is only used when the vector column has an IVF PQ index.
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ use lance::dataset::scanner::DatasetRecordBatchStream;
|
|||||||
use lance::dataset::{ColumnAlteration, NewColumnTransform};
|
use lance::dataset::{ColumnAlteration, NewColumnTransform};
|
||||||
use lance_datafusion::exec::OneShotExec;
|
use lance_datafusion::exec::OneShotExec;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tokio::sync::RwLock;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
connection::NoData,
|
connection::NoData,
|
||||||
@@ -43,17 +44,32 @@ pub struct RemoteTable<S: HttpSend = Sender> {
|
|||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
client: RestfulLanceDbClient<S>,
|
client: RestfulLanceDbClient<S>,
|
||||||
name: String,
|
name: String,
|
||||||
|
|
||||||
|
version: RwLock<Option<u64>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<S: HttpSend> RemoteTable<S> {
|
impl<S: HttpSend> RemoteTable<S> {
|
||||||
pub fn new(client: RestfulLanceDbClient<S>, name: String) -> Self {
|
pub fn new(client: RestfulLanceDbClient<S>, name: String) -> Self {
|
||||||
Self { client, name }
|
Self {
|
||||||
|
client,
|
||||||
|
name,
|
||||||
|
version: RwLock::new(None),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn describe(&self) -> Result<TableDescription> {
|
async fn describe(&self) -> Result<TableDescription> {
|
||||||
let request = self
|
let version = self.current_version().await;
|
||||||
|
self.describe_version(version).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn describe_version(&self, version: Option<u64>) -> Result<TableDescription> {
|
||||||
|
let mut request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/describe/", self.name));
|
.post(&format!("/v1/table/{}/describe/", self.name));
|
||||||
|
|
||||||
|
let body = serde_json::json!({ "version": version });
|
||||||
|
request = request.json(&body);
|
||||||
|
|
||||||
let (request_id, response) = self.client.send(request, true).await?;
|
let (request_id, response) = self.client.send(request, true).await?;
|
||||||
|
|
||||||
let response = self.check_table_response(&request_id, response).await?;
|
let response = self.check_table_response(&request_id, response).await?;
|
||||||
@@ -196,6 +212,7 @@ impl<S: HttpSend> RemoteTable<S> {
|
|||||||
body["prefilter"] = query.base.prefilter.into();
|
body["prefilter"] = query.base.prefilter.into();
|
||||||
body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
|
body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
|
||||||
body["nprobes"] = query.nprobes.into();
|
body["nprobes"] = query.nprobes.into();
|
||||||
|
body["ef"] = query.ef.into();
|
||||||
body["refine_factor"] = query.refine_factor.into();
|
body["refine_factor"] = query.refine_factor.into();
|
||||||
if let Some(vector_column) = query.column.as_ref() {
|
if let Some(vector_column) = query.column.as_ref() {
|
||||||
body["vector_column"] = serde_json::Value::String(vector_column.clone());
|
body["vector_column"] = serde_json::Value::String(vector_column.clone());
|
||||||
@@ -250,6 +267,24 @@ impl<S: HttpSend> RemoteTable<S> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn check_mutable(&self) -> Result<()> {
|
||||||
|
let read_guard = self.version.read().await;
|
||||||
|
match *read_guard {
|
||||||
|
None => Ok(()),
|
||||||
|
Some(version) => Err(Error::NotSupported {
|
||||||
|
message: format!(
|
||||||
|
"Cannot mutate table reference fixed at version {}. Call checkout_latest() to get a mutable table reference.",
|
||||||
|
version
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn current_version(&self) -> Option<u64> {
|
||||||
|
let read_guard = self.version.read().await;
|
||||||
|
*read_guard
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
@@ -277,7 +312,11 @@ mod test_utils {
|
|||||||
T: Into<reqwest::Body>,
|
T: Into<reqwest::Body>,
|
||||||
{
|
{
|
||||||
let client = client_with_handler(handler);
|
let client = client_with_handler(handler);
|
||||||
Self { client, name }
|
Self {
|
||||||
|
client,
|
||||||
|
name,
|
||||||
|
version: RwLock::new(None),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -296,17 +335,30 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
|
|||||||
async fn version(&self) -> Result<u64> {
|
async fn version(&self) -> Result<u64> {
|
||||||
self.describe().await.map(|desc| desc.version)
|
self.describe().await.map(|desc| desc.version)
|
||||||
}
|
}
|
||||||
async fn checkout(&self, _version: u64) -> Result<()> {
|
async fn checkout(&self, version: u64) -> Result<()> {
|
||||||
Err(Error::NotSupported {
|
// check that the version exists
|
||||||
message: "checkout is not supported on LanceDB cloud.".into(),
|
self.describe_version(Some(version))
|
||||||
})
|
.await
|
||||||
|
.map_err(|e| match e {
|
||||||
|
// try to map the error to a more user-friendly error telling them
|
||||||
|
// specifically that the version does not exist
|
||||||
|
Error::TableNotFound { name } => Error::TableNotFound {
|
||||||
|
name: format!("{} (version: {})", name, version),
|
||||||
|
},
|
||||||
|
e => e,
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let mut write_guard = self.version.write().await;
|
||||||
|
*write_guard = Some(version);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
async fn checkout_latest(&self) -> Result<()> {
|
async fn checkout_latest(&self) -> Result<()> {
|
||||||
Err(Error::NotSupported {
|
let mut write_guard = self.version.write().await;
|
||||||
message: "checkout is not supported on LanceDB cloud.".into(),
|
*write_guard = None;
|
||||||
})
|
Ok(())
|
||||||
}
|
}
|
||||||
async fn restore(&self) -> Result<()> {
|
async fn restore(&self) -> Result<()> {
|
||||||
|
self.check_mutable().await?;
|
||||||
Err(Error::NotSupported {
|
Err(Error::NotSupported {
|
||||||
message: "restore is not supported on LanceDB cloud.".into(),
|
message: "restore is not supported on LanceDB cloud.".into(),
|
||||||
})
|
})
|
||||||
@@ -320,10 +372,13 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
|
|||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/count_rows/", self.name));
|
.post(&format!("/v1/table/{}/count_rows/", self.name));
|
||||||
|
|
||||||
|
let version = self.current_version().await;
|
||||||
|
|
||||||
if let Some(filter) = filter {
|
if let Some(filter) = filter {
|
||||||
request = request.json(&serde_json::json!({ "predicate": filter }));
|
request = request.json(&serde_json::json!({ "predicate": filter, "version": version }));
|
||||||
} else {
|
} else {
|
||||||
request = request.json(&serde_json::json!({}));
|
let body = serde_json::json!({ "version": version });
|
||||||
|
request = request.json(&body);
|
||||||
}
|
}
|
||||||
|
|
||||||
let (request_id, response) = self.client.send(request, true).await?;
|
let (request_id, response) = self.client.send(request, true).await?;
|
||||||
@@ -343,6 +398,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
|
|||||||
add: AddDataBuilder<NoData>,
|
add: AddDataBuilder<NoData>,
|
||||||
data: Box<dyn RecordBatchReader + Send>,
|
data: Box<dyn RecordBatchReader + Send>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
self.check_mutable().await?;
|
||||||
let body = Self::reader_as_body(data)?;
|
let body = Self::reader_as_body(data)?;
|
||||||
let mut request = self
|
let mut request = self
|
||||||
.client
|
.client
|
||||||
@@ -371,7 +427,8 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
|
|||||||
) -> Result<Arc<dyn ExecutionPlan>> {
|
) -> Result<Arc<dyn ExecutionPlan>> {
|
||||||
let request = self.client.post(&format!("/v1/table/{}/query/", self.name));
|
let request = self.client.post(&format!("/v1/table/{}/query/", self.name));
|
||||||
|
|
||||||
let body = serde_json::Value::Object(Default::default());
|
let version = self.current_version().await;
|
||||||
|
let body = serde_json::json!({ "version": version });
|
||||||
let bodies = Self::apply_vector_query_params(body, query)?;
|
let bodies = Self::apply_vector_query_params(body, query)?;
|
||||||
|
|
||||||
let mut futures = Vec::with_capacity(bodies.len());
|
let mut futures = Vec::with_capacity(bodies.len());
|
||||||
@@ -406,7 +463,8 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
|
|||||||
.post(&format!("/v1/table/{}/query/", self.name))
|
.post(&format!("/v1/table/{}/query/", self.name))
|
||||||
.header(CONTENT_TYPE, JSON_CONTENT_TYPE);
|
.header(CONTENT_TYPE, JSON_CONTENT_TYPE);
|
||||||
|
|
||||||
let mut body = serde_json::Value::Object(Default::default());
|
let version = self.current_version().await;
|
||||||
|
let mut body = serde_json::json!({ "version": version });
|
||||||
Self::apply_query_params(&mut body, query)?;
|
Self::apply_query_params(&mut body, query)?;
|
||||||
// Empty vector can be passed if no vector search is performed.
|
// Empty vector can be passed if no vector search is performed.
|
||||||
body["vector"] = serde_json::Value::Array(Vec::new());
|
body["vector"] = serde_json::Value::Array(Vec::new());
|
||||||
@@ -420,6 +478,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
|
|||||||
Ok(DatasetRecordBatchStream::new(stream))
|
Ok(DatasetRecordBatchStream::new(stream))
|
||||||
}
|
}
|
||||||
async fn update(&self, update: UpdateBuilder) -> Result<u64> {
|
async fn update(&self, update: UpdateBuilder) -> Result<u64> {
|
||||||
|
self.check_mutable().await?;
|
||||||
let request = self
|
let request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/update/", self.name));
|
.post(&format!("/v1/table/{}/update/", self.name));
|
||||||
@@ -441,6 +500,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
|
|||||||
Ok(0) // TODO: support returning number of modified rows once supported in SaaS.
|
Ok(0) // TODO: support returning number of modified rows once supported in SaaS.
|
||||||
}
|
}
|
||||||
async fn delete(&self, predicate: &str) -> Result<()> {
|
async fn delete(&self, predicate: &str) -> Result<()> {
|
||||||
|
self.check_mutable().await?;
|
||||||
let body = serde_json::json!({ "predicate": predicate });
|
let body = serde_json::json!({ "predicate": predicate });
|
||||||
let request = self
|
let request = self
|
||||||
.client
|
.client
|
||||||
@@ -452,6 +512,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn create_index(&self, mut index: IndexBuilder) -> Result<()> {
|
async fn create_index(&self, mut index: IndexBuilder) -> Result<()> {
|
||||||
|
self.check_mutable().await?;
|
||||||
let request = self
|
let request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/create_index/", self.name));
|
.post(&format!("/v1/table/{}/create_index/", self.name));
|
||||||
@@ -530,6 +591,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
|
|||||||
params: MergeInsertBuilder,
|
params: MergeInsertBuilder,
|
||||||
new_data: Box<dyn RecordBatchReader + Send>,
|
new_data: Box<dyn RecordBatchReader + Send>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
self.check_mutable().await?;
|
||||||
let query = MergeInsertRequest::try_from(params)?;
|
let query = MergeInsertRequest::try_from(params)?;
|
||||||
let body = Self::reader_as_body(new_data)?;
|
let body = Self::reader_as_body(new_data)?;
|
||||||
let request = self
|
let request = self
|
||||||
@@ -546,6 +608,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
async fn optimize(&self, _action: OptimizeAction) -> Result<OptimizeStats> {
|
async fn optimize(&self, _action: OptimizeAction) -> Result<OptimizeStats> {
|
||||||
|
self.check_mutable().await?;
|
||||||
Err(Error::NotSupported {
|
Err(Error::NotSupported {
|
||||||
message: "optimize is not supported on LanceDB cloud.".into(),
|
message: "optimize is not supported on LanceDB cloud.".into(),
|
||||||
})
|
})
|
||||||
@@ -555,16 +618,19 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
|
|||||||
_transforms: NewColumnTransform,
|
_transforms: NewColumnTransform,
|
||||||
_read_columns: Option<Vec<String>>,
|
_read_columns: Option<Vec<String>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
self.check_mutable().await?;
|
||||||
Err(Error::NotSupported {
|
Err(Error::NotSupported {
|
||||||
message: "add_columns is not yet supported.".into(),
|
message: "add_columns is not yet supported.".into(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
async fn alter_columns(&self, _alterations: &[ColumnAlteration]) -> Result<()> {
|
async fn alter_columns(&self, _alterations: &[ColumnAlteration]) -> Result<()> {
|
||||||
|
self.check_mutable().await?;
|
||||||
Err(Error::NotSupported {
|
Err(Error::NotSupported {
|
||||||
message: "alter_columns is not yet supported.".into(),
|
message: "alter_columns is not yet supported.".into(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
async fn drop_columns(&self, _columns: &[&str]) -> Result<()> {
|
async fn drop_columns(&self, _columns: &[&str]) -> Result<()> {
|
||||||
|
self.check_mutable().await?;
|
||||||
Err(Error::NotSupported {
|
Err(Error::NotSupported {
|
||||||
message: "drop_columns is not yet supported.".into(),
|
message: "drop_columns is not yet supported.".into(),
|
||||||
})
|
})
|
||||||
@@ -572,9 +638,13 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
|
|||||||
|
|
||||||
async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
|
async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
|
||||||
// Make request to list the indices
|
// Make request to list the indices
|
||||||
let request = self
|
let mut request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/index/list/", self.name));
|
.post(&format!("/v1/table/{}/index/list/", self.name));
|
||||||
|
let version = self.current_version().await;
|
||||||
|
let body = serde_json::json!({ "version": version });
|
||||||
|
request = request.json(&body);
|
||||||
|
|
||||||
let (request_id, response) = self.client.send(request, true).await?;
|
let (request_id, response) = self.client.send(request, true).await?;
|
||||||
let response = self.check_table_response(&request_id, response).await?;
|
let response = self.check_table_response(&request_id, response).await?;
|
||||||
|
|
||||||
@@ -624,10 +694,14 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
|
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
|
||||||
let request = self.client.post(&format!(
|
let mut request = self.client.post(&format!(
|
||||||
"/v1/table/{}/index/{}/stats/",
|
"/v1/table/{}/index/{}/stats/",
|
||||||
self.name, index_name
|
self.name, index_name
|
||||||
));
|
));
|
||||||
|
let version = self.current_version().await;
|
||||||
|
let body = serde_json::json!({ "version": version });
|
||||||
|
request = request.json(&body);
|
||||||
|
|
||||||
let (request_id, response) = self.client.send(request, true).await?;
|
let (request_id, response) = self.client.send(request, true).await?;
|
||||||
|
|
||||||
if response.status() == StatusCode::NOT_FOUND {
|
if response.status() == StatusCode::NOT_FOUND {
|
||||||
@@ -805,7 +879,10 @@ mod tests {
|
|||||||
request.headers().get("Content-Type").unwrap(),
|
request.headers().get("Content-Type").unwrap(),
|
||||||
JSON_CONTENT_TYPE
|
JSON_CONTENT_TYPE
|
||||||
);
|
);
|
||||||
assert_eq!(request.body().unwrap().as_bytes().unwrap(), br#"{}"#);
|
assert_eq!(
|
||||||
|
request.body().unwrap().as_bytes().unwrap(),
|
||||||
|
br#"{"version":null}"#
|
||||||
|
);
|
||||||
|
|
||||||
http::Response::builder().status(200).body("42").unwrap()
|
http::Response::builder().status(200).body("42").unwrap()
|
||||||
});
|
});
|
||||||
@@ -822,7 +899,7 @@ mod tests {
|
|||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
request.body().unwrap().as_bytes().unwrap(),
|
request.body().unwrap().as_bytes().unwrap(),
|
||||||
br#"{"predicate":"a > 10"}"#
|
br#"{"predicate":"a > 10","version":null}"#
|
||||||
);
|
);
|
||||||
|
|
||||||
http::Response::builder().status(200).body("42").unwrap()
|
http::Response::builder().status(200).body("42").unwrap()
|
||||||
@@ -1121,7 +1198,9 @@ mod tests {
|
|||||||
"prefilter": true,
|
"prefilter": true,
|
||||||
"distance_type": "l2",
|
"distance_type": "l2",
|
||||||
"nprobes": 20,
|
"nprobes": 20,
|
||||||
|
"ef": Option::<usize>::None,
|
||||||
"refine_factor": null,
|
"refine_factor": null,
|
||||||
|
"version": null,
|
||||||
});
|
});
|
||||||
// Pass vector separately to make sure it matches f32 precision.
|
// Pass vector separately to make sure it matches f32 precision.
|
||||||
expected_body["vector"] = vec![0.1f32, 0.2, 0.3].into();
|
expected_body["vector"] = vec![0.1f32, 0.2, 0.3].into();
|
||||||
@@ -1166,7 +1245,9 @@ mod tests {
|
|||||||
"bypass_vector_index": true,
|
"bypass_vector_index": true,
|
||||||
"columns": ["a", "b"],
|
"columns": ["a", "b"],
|
||||||
"nprobes": 12,
|
"nprobes": 12,
|
||||||
|
"ef": Option::<usize>::None,
|
||||||
"refine_factor": 2,
|
"refine_factor": 2,
|
||||||
|
"version": null,
|
||||||
});
|
});
|
||||||
// Pass vector separately to make sure it matches f32 precision.
|
// Pass vector separately to make sure it matches f32 precision.
|
||||||
expected_body["vector"] = vec![0.1f32, 0.2, 0.3].into();
|
expected_body["vector"] = vec![0.1f32, 0.2, 0.3].into();
|
||||||
@@ -1222,6 +1303,7 @@ mod tests {
|
|||||||
"k": 10,
|
"k": 10,
|
||||||
"vector": [],
|
"vector": [],
|
||||||
"with_row_id": true,
|
"with_row_id": true,
|
||||||
|
"version": null
|
||||||
});
|
});
|
||||||
assert_eq!(body, expected_body);
|
assert_eq!(body, expected_body);
|
||||||
|
|
||||||
@@ -1451,4 +1533,195 @@ mod tests {
|
|||||||
let indices = table.index_stats("my_index").await.unwrap();
|
let indices = table.index_stats("my_index").await.unwrap();
|
||||||
assert!(indices.is_none());
|
assert!(indices.is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_passes_version() {
|
||||||
|
let table = Table::new_with_handler("my_table", |request| {
|
||||||
|
let body = request.body().unwrap().as_bytes().unwrap();
|
||||||
|
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||||
|
let version = body
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.get("version")
|
||||||
|
.unwrap()
|
||||||
|
.as_u64()
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(version, 42);
|
||||||
|
|
||||||
|
let response_body = match request.url().path() {
|
||||||
|
"/v1/table/my_table/describe/" => {
|
||||||
|
serde_json::json!({
|
||||||
|
"version": 42,
|
||||||
|
"schema": { "fields": [] }
|
||||||
|
})
|
||||||
|
}
|
||||||
|
"/v1/table/my_table/index/list/" => {
|
||||||
|
serde_json::json!({
|
||||||
|
"indexes": []
|
||||||
|
})
|
||||||
|
}
|
||||||
|
"/v1/table/my_table/index/my_idx/stats/" => {
|
||||||
|
serde_json::json!({
|
||||||
|
"num_indexed_rows": 100000,
|
||||||
|
"num_unindexed_rows": 0,
|
||||||
|
"index_type": "IVF_PQ",
|
||||||
|
"distance_type": "l2"
|
||||||
|
})
|
||||||
|
}
|
||||||
|
"/v1/table/my_table/count_rows/" => {
|
||||||
|
serde_json::json!(1000)
|
||||||
|
}
|
||||||
|
"/v1/table/my_table/query/" => {
|
||||||
|
let expected_data = RecordBatch::try_new(
|
||||||
|
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||||
|
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let expected_data_ref = expected_data.clone();
|
||||||
|
let response_body = write_ipc_file(&expected_data_ref);
|
||||||
|
return http::Response::builder()
|
||||||
|
.status(200)
|
||||||
|
.header(CONTENT_TYPE, ARROW_FILE_CONTENT_TYPE)
|
||||||
|
.body(response_body)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
path => panic!("Unexpected path: {}", path),
|
||||||
|
};
|
||||||
|
|
||||||
|
http::Response::builder()
|
||||||
|
.status(200)
|
||||||
|
.body(
|
||||||
|
serde_json::to_string(&response_body)
|
||||||
|
.unwrap()
|
||||||
|
.as_bytes()
|
||||||
|
.to_vec(),
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
table.checkout(42).await.unwrap();
|
||||||
|
|
||||||
|
// ensure that version is passed to the /describe endpoint
|
||||||
|
let version = table.version().await.unwrap();
|
||||||
|
assert_eq!(version, 42);
|
||||||
|
|
||||||
|
// ensure it's passed to other read API calls
|
||||||
|
table.list_indices().await.unwrap();
|
||||||
|
table.index_stats("my_idx").await.unwrap();
|
||||||
|
table.count_rows(None).await.unwrap();
|
||||||
|
table
|
||||||
|
.query()
|
||||||
|
.nearest_to(vec![0.1, 0.2, 0.3])
|
||||||
|
.unwrap()
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_fails_if_checkout_version_doesnt_exist() {
|
||||||
|
let table = Table::new_with_handler("my_table", |request| {
|
||||||
|
let body = request.body().unwrap().as_bytes().unwrap();
|
||||||
|
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||||
|
let version = body
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.get("version")
|
||||||
|
.unwrap()
|
||||||
|
.as_u64()
|
||||||
|
.unwrap();
|
||||||
|
if version != 42 {
|
||||||
|
return http::Response::builder()
|
||||||
|
.status(404)
|
||||||
|
.body(format!("Table my_table (version: {}) not found", version))
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let response_body = match request.url().path() {
|
||||||
|
"/v1/table/my_table/describe/" => {
|
||||||
|
serde_json::json!({
|
||||||
|
"version": 42,
|
||||||
|
"schema": { "fields": [] }
|
||||||
|
})
|
||||||
|
}
|
||||||
|
_ => panic!("Unexpected path"),
|
||||||
|
};
|
||||||
|
|
||||||
|
http::Response::builder()
|
||||||
|
.status(200)
|
||||||
|
.body(serde_json::to_string(&response_body).unwrap())
|
||||||
|
.unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
let res = table.checkout(43).await;
|
||||||
|
println!("{:?}", res);
|
||||||
|
assert!(
|
||||||
|
matches!(res, Err(Error::TableNotFound { name }) if name == "my_table (version: 43)")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_timetravel_immutable() {
|
||||||
|
let table = Table::new_with_handler::<String>("my_table", |request| {
|
||||||
|
let response_body = match request.url().path() {
|
||||||
|
"/v1/table/my_table/describe/" => {
|
||||||
|
serde_json::json!({
|
||||||
|
"version": 42,
|
||||||
|
"schema": { "fields": [] }
|
||||||
|
})
|
||||||
|
}
|
||||||
|
_ => panic!("Should not have made a request: {:?}", request),
|
||||||
|
};
|
||||||
|
|
||||||
|
http::Response::builder()
|
||||||
|
.status(200)
|
||||||
|
.body(serde_json::to_string(&response_body).unwrap())
|
||||||
|
.unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
table.checkout(42).await.unwrap();
|
||||||
|
|
||||||
|
// Ensure that all mutable operations fail.
|
||||||
|
let res = table
|
||||||
|
.update()
|
||||||
|
.column("a", "a + 1")
|
||||||
|
.column("b", "b - 1")
|
||||||
|
.only_if("b > 10")
|
||||||
|
.execute()
|
||||||
|
.await;
|
||||||
|
assert!(matches!(res, Err(Error::NotSupported { .. })));
|
||||||
|
|
||||||
|
let batch = RecordBatch::try_new(
|
||||||
|
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||||
|
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let data = Box::new(RecordBatchIterator::new(
|
||||||
|
[Ok(batch.clone())],
|
||||||
|
batch.schema(),
|
||||||
|
));
|
||||||
|
let res = table.merge_insert(&["some_col"]).execute(data).await;
|
||||||
|
assert!(matches!(res, Err(Error::NotSupported { .. })));
|
||||||
|
|
||||||
|
let res = table.delete("id in (1, 2, 3)").await;
|
||||||
|
assert!(matches!(res, Err(Error::NotSupported { .. })));
|
||||||
|
|
||||||
|
let data = RecordBatch::try_new(
|
||||||
|
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||||
|
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let res = table
|
||||||
|
.add(RecordBatchIterator::new([Ok(data.clone())], data.schema()))
|
||||||
|
.execute()
|
||||||
|
.await;
|
||||||
|
assert!(matches!(res, Err(Error::NotSupported { .. })));
|
||||||
|
|
||||||
|
let res = table
|
||||||
|
.create_index(&["a"], Index::IvfPq(Default::default()))
|
||||||
|
.execute()
|
||||||
|
.await;
|
||||||
|
assert!(matches!(res, Err(Error::NotSupported { .. })));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1904,6 +1904,9 @@ impl TableInternal for NativeTable {
|
|||||||
query.base.offset.map(|offset| offset as i64),
|
query.base.offset.map(|offset| offset as i64),
|
||||||
)?;
|
)?;
|
||||||
scanner.nprobs(query.nprobes);
|
scanner.nprobs(query.nprobes);
|
||||||
|
if let Some(ef) = query.ef {
|
||||||
|
scanner.ef(ef);
|
||||||
|
}
|
||||||
scanner.use_index(query.use_index);
|
scanner.use_index(query.use_index);
|
||||||
scanner.prefilter(query.base.prefilter);
|
scanner.prefilter(query.base.prefilter);
|
||||||
match query.base.select {
|
match query.base.select {
|
||||||
|
|||||||
Reference in New Issue
Block a user