mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-26 06:39:57 +00:00
Compare commits
12 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b06e214d29 | ||
|
|
c1f8feb6ed | ||
|
|
cada35d5b7 | ||
|
|
2d25c263e9 | ||
|
|
bcd7f66dc7 | ||
|
|
1daecac648 | ||
|
|
b8e656b2a7 | ||
|
|
ff7c1193a7 | ||
|
|
6d70e7c29b | ||
|
|
73cc12ecc5 | ||
|
|
6036cf48a7 | ||
|
|
15f4787cc8 |
@@ -1,5 +1,5 @@
|
|||||||
[bumpversion]
|
[bumpversion]
|
||||||
current_version = 0.1.18
|
current_version = 0.1.19
|
||||||
commit = True
|
commit = True
|
||||||
message = Bump version: {current_version} → {new_version}
|
message = Bump version: {current_version} → {new_version}
|
||||||
tag = True
|
tag = True
|
||||||
|
|||||||
4
.github/workflows/python.yml
vendored
4
.github/workflows/python.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
|||||||
python-version: 3.${{ matrix.python-minor-version }}
|
python-version: 3.${{ matrix.python-minor-version }}
|
||||||
- name: Install lancedb
|
- name: Install lancedb
|
||||||
run: |
|
run: |
|
||||||
pip install -e .
|
pip install -e .[tests]
|
||||||
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
||||||
pip install pytest pytest-mock black isort
|
pip install pytest pytest-mock black isort
|
||||||
- name: Black
|
- name: Black
|
||||||
@@ -59,7 +59,7 @@ jobs:
|
|||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
- name: Install lancedb
|
- name: Install lancedb
|
||||||
run: |
|
run: |
|
||||||
pip install -e .
|
pip install -e .[tests]
|
||||||
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
|
||||||
pip install pytest pytest-mock black
|
pip install pytest pytest-mock black
|
||||||
- name: Black
|
- name: Black
|
||||||
|
|||||||
@@ -13,4 +13,5 @@ arrow-schema = "42.0"
|
|||||||
arrow-ipc = "42.0"
|
arrow-ipc = "42.0"
|
||||||
half = { "version" = "=2.2.1", default-features = false }
|
half = { "version" = "=2.2.1", default-features = false }
|
||||||
object_store = "0.6.1"
|
object_store = "0.6.1"
|
||||||
|
snafu = "0.7.4"
|
||||||
|
|
||||||
|
|||||||
@@ -57,12 +57,14 @@ nav:
|
|||||||
- Basics: basic.md
|
- Basics: basic.md
|
||||||
- Embeddings: embedding.md
|
- Embeddings: embedding.md
|
||||||
- Python full-text search: fts.md
|
- Python full-text search: fts.md
|
||||||
- Python integrations:
|
- Integrations:
|
||||||
- Pandas and PyArrow: python/arrow.md
|
- Pandas and PyArrow: python/arrow.md
|
||||||
- DuckDB: python/duckdb.md
|
- DuckDB: python/duckdb.md
|
||||||
- LangChain 🦜️🔗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html
|
- LangChain 🦜️🔗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html
|
||||||
|
- LangChain JS/TS 🦜️🔗: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/lancedb
|
||||||
- LlamaIndex 🦙: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html
|
- LlamaIndex 🦙: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html
|
||||||
- Pydantic: python/pydantic.md
|
- Pydantic: python/pydantic.md
|
||||||
|
- Voxel51: integrations/voxel51.md
|
||||||
- Python examples:
|
- Python examples:
|
||||||
- YouTube Transcript Search: notebooks/youtube_transcript_search.ipynb
|
- YouTube Transcript Search: notebooks/youtube_transcript_search.ipynb
|
||||||
- Documentation QA Bot using LangChain: notebooks/code_qa_bot.ipynb
|
- Documentation QA Bot using LangChain: notebooks/code_qa_bot.ipynb
|
||||||
@@ -72,6 +74,7 @@ nav:
|
|||||||
- Javascript examples:
|
- Javascript examples:
|
||||||
- YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
|
- YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
|
||||||
- TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
|
- TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
|
||||||
|
|
||||||
- References:
|
- References:
|
||||||
- Vector Search: search.md
|
- Vector Search: search.md
|
||||||
- SQL filters: sql.md
|
- SQL filters: sql.md
|
||||||
|
|||||||
BIN
docs/src/assets/voxel.gif
Normal file
BIN
docs/src/assets/voxel.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 953 KiB |
@@ -4,4 +4,10 @@
|
|||||||
|
|
||||||
<img id="splash" width="400" alt="youtube transcript search" src="https://user-images.githubusercontent.com/917119/236965568-def7394d-171c-45f2-939d-8edfeaadd88c.png">
|
<img id="splash" width="400" alt="youtube transcript search" src="https://user-images.githubusercontent.com/917119/236965568-def7394d-171c-45f2-939d-8edfeaadd88c.png">
|
||||||
|
|
||||||
|
|
||||||
|
<a href="https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/youtube_bot/main.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab">
|
||||||
|
|
||||||
|
Scripts - [](./examples/youtube_bot/main.py) [](./examples/youtube_bot/index.js)
|
||||||
|
|
||||||
|
|
||||||
This example is in a [notebook](https://github.com/lancedb/lancedb/blob/main/docs/src/notebooks/youtube_transcript_search.ipynb)
|
This example is in a [notebook](https://github.com/lancedb/lancedb/blob/main/docs/src/notebooks/youtube_transcript_search.ipynb)
|
||||||
|
|||||||
71
docs/src/integrations/voxel51.md
Normal file
71
docs/src/integrations/voxel51.md
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|

|
||||||
|
|
||||||
|
Basic recipe
|
||||||
|
____________
|
||||||
|
|
||||||
|
The basic workflow to use LanceDB to create a similarity index on your FiftyOne
|
||||||
|
datasets and use this to query your data is as follows:
|
||||||
|
|
||||||
|
1) Load a dataset into FiftyOne
|
||||||
|
|
||||||
|
2) Compute embedding vectors for samples or patches in your dataset, or select
|
||||||
|
a model to use to generate embeddings
|
||||||
|
|
||||||
|
3) Use the `compute_similarity()`
|
||||||
|
method to generate a LanceDB table for the samples or object
|
||||||
|
patches embeddings in a dataset by setting the parameter `backend="lancedb"` and
|
||||||
|
specifying a `brain_key` of your choice
|
||||||
|
|
||||||
|
4) Use this LanceDB table to query your data with
|
||||||
|
`sort_by_similarity()`
|
||||||
|
|
||||||
|
5) If desired, delete the table
|
||||||
|
|
||||||
|
The example below demonstrates this workflow.
|
||||||
|
|
||||||
|
!!! Note
|
||||||
|
|
||||||
|
You must install the LanceDB Python client to run this
|
||||||
|
```
|
||||||
|
pip install lancedb
|
||||||
|
```
|
||||||
|
|
||||||
|
```python
|
||||||
|
|
||||||
|
import fiftyone as fo
|
||||||
|
import fiftyone.brain as fob
|
||||||
|
import fiftyone.zoo as foz
|
||||||
|
|
||||||
|
# Step 1: Load your data into FiftyOne
|
||||||
|
dataset = foz.load_zoo_dataset("quickstart")
|
||||||
|
|
||||||
|
# Steps 2 and 3: Compute embeddings and create a similarity index
|
||||||
|
lancedb_index = fob.compute_similarity(
|
||||||
|
dataset,
|
||||||
|
model="clip-vit-base32-torch",
|
||||||
|
brain_key="lancedb_index",
|
||||||
|
backend="lancedb",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
Once the similarity index has been generated, we can query our data in FiftyOne
|
||||||
|
by specifying the `brain_key`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Step 4: Query your data
|
||||||
|
query = dataset.first().id # query by sample ID
|
||||||
|
view = dataset.sort_by_similarity(
|
||||||
|
query,
|
||||||
|
brain_key="lancedb_index",
|
||||||
|
k=10, # limit to 10 most similar samples
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 5 (optional): Cleanup
|
||||||
|
|
||||||
|
# Delete the LanceDB table
|
||||||
|
lancedb_index.cleanup()
|
||||||
|
|
||||||
|
# Delete run record from FiftyOne
|
||||||
|
dataset.delete_brain_run("lancedb_index")
|
||||||
|
```
|
||||||
|
|
||||||
|
More in depth walkthrough of the integration, visit the LanceDB guide on Voxel51 - [LaceDB x Voxel51](https://docs.voxel51.com/integrations/lancedb.html)
|
||||||
@@ -10,7 +10,11 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"This Q&A bot will allow you to query your own documentation easily using questions. We'll also demonstrate the use of LangChain and LanceDB using the OpenAI API. \n",
|
"This Q&A bot will allow you to query your own documentation easily using questions. We'll also demonstrate the use of LangChain and LanceDB using the OpenAI API. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this example we'll use Pandas 2.0 documentation, but, this could be replaced for your own docs as well"
|
"In this example we'll use Pandas 2.0 documentation, but, this could be replaced for your own docs as well\n",
|
||||||
|
"\n",
|
||||||
|
"<a href=\"https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/Code-Documentation-QA-Bot/main.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
|
||||||
|
"\n",
|
||||||
|
"Scripts - [](./examples/Code-Documentation-QA-Bot/main.py) [](./examples/Code-Documentation-QA-Bot/index.js)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,5 +1,14 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" <a href=\"https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/multimodal_clip/main.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>| [](./examples/multimodal_clip/main.py) |"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 2,
|
||||||
@@ -42,6 +51,19 @@
|
|||||||
"## First run setup: Download data and pre-process"
|
"## First run setup: Download data and pre-process"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"### Get dataset\n",
|
||||||
|
"\n",
|
||||||
|
"!wget https://eto-public.s3.us-west-2.amazonaws.com/datasets/diffusiondb_lance.tar.gz\n",
|
||||||
|
"!tar -xvf diffusiondb_lance.tar.gz\n",
|
||||||
|
"!mv diffusiondb_test rawdata.lance\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 30,
|
"execution_count": 30,
|
||||||
@@ -247,7 +269,7 @@
|
|||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "Python 3.11.4 64-bit",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
@@ -261,7 +283,12 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.11.3"
|
"version": "3.11.4"
|
||||||
|
},
|
||||||
|
"vscode": {
|
||||||
|
"interpreter": {
|
||||||
|
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -8,7 +8,12 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Youtube Transcript Search QA Bot\n",
|
"# Youtube Transcript Search QA Bot\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This Q&A bot will allow you to search through youtube transcripts using natural language! By going through this notebook, we'll introduce how you can use LanceDB to store and manage your data easily."
|
"This Q&A bot will allow you to search through youtube transcripts using natural language! By going through this notebook, we'll introduce how you can use LanceDB to store and manage your data easily.\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"<a href=\"https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/youtube_bot/main.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\">\n",
|
||||||
|
"\n",
|
||||||
|
"Scripts - [](./examples/youtube_bot/main.py) [](./examples/youtube_bot/index.js)\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
# Pydantic
|
# Pydantic
|
||||||
|
|
||||||
[Pydantic](https://docs.pydantic.dev/latest/) is a data validation library in Python.
|
[Pydantic](https://docs.pydantic.dev/latest/) is a data validation library in Python.
|
||||||
|
LanceDB integrates with Pydantic for schema inference, data ingestion, and query result casting.
|
||||||
|
|
||||||
|
|
||||||
## Schema
|
## Schema
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,8 @@ excluded_files = [
|
|||||||
"../src/embedding.md",
|
"../src/embedding.md",
|
||||||
"../src/examples/serverless_lancedb_with_s3_and_lambda.md",
|
"../src/examples/serverless_lancedb_with_s3_and_lambda.md",
|
||||||
"../src/examples/serverless_qa_bot_with_modal_and_langchain.md",
|
"../src/examples/serverless_qa_bot_with_modal_and_langchain.md",
|
||||||
"../src/examples/youtube_transcript_bot_with_nodejs.md"
|
"../src/examples/youtube_transcript_bot_with_nodejs.md",
|
||||||
|
"../src/integrations/voxel51.md",
|
||||||
]
|
]
|
||||||
|
|
||||||
python_prefix = "py"
|
python_prefix = "py"
|
||||||
|
|||||||
74
node/package-lock.json
generated
74
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.1.18",
|
"version": "0.1.19",
|
||||||
"lockfileVersion": 2,
|
"lockfileVersion": 2,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.1.18",
|
"version": "0.1.19",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -51,11 +51,11 @@
|
|||||||
"typescript": "*"
|
"typescript": "*"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.1.18",
|
"@lancedb/vectordb-darwin-arm64": "0.1.19",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.1.18",
|
"@lancedb/vectordb-darwin-x64": "0.1.19",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.1.18",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.1.19",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.1.18",
|
"@lancedb/vectordb-linux-x64-gnu": "0.1.19",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.1.18"
|
"@lancedb/vectordb-win32-x64-msvc": "0.1.19"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@apache-arrow/ts": {
|
"node_modules/@apache-arrow/ts": {
|
||||||
@@ -315,9 +315,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||||
"version": "0.1.18",
|
"version": "0.1.19",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.18.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.19.tgz",
|
||||||
"integrity": "sha512-vu8MCFgaAAGmTJF+4RaoApROMpRVVgrCk+V9my4adAfWkkXbSmtxiDgiIwwL1VqdGb8UwzGn3kVbNW7idE1ojA==",
|
"integrity": "sha512-efQhJkBKvMNhjFq3Sw3/qHo9D9gb9UqiIr98n3STsbNxBQjMnWemXn91Ckl40siRG1O8qXcINW7Qs/EGmus+kg==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
@@ -327,9 +327,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||||
"version": "0.1.18",
|
"version": "0.1.19",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.18.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.19.tgz",
|
||||||
"integrity": "sha512-ZU30bd6frRyKJ515ow972PlqO2wIiNT4Ohor9+KbUwl/VKDyAwKOKG8cWhRJXTxk0k1oqpiJ6+Q28TcYJ0sSAw==",
|
"integrity": "sha512-r6OZNVyemAssABz2w7CRhe7dyREwBEfTytn+ux1zzTnzsgMgDovCQ0rQ3WZcxWvcy7SFCxiemA9IP1b/lsb4tQ==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -339,9 +339,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||||
"version": "0.1.18",
|
"version": "0.1.19",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.18.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.19.tgz",
|
||||||
"integrity": "sha512-2UroC026bUYwyciSRonYlXei0SoYbKgfWpozxYOu7GgBAV2CQQtaAPgWJTEl6ZiCNeBmBTx+j0h3+ydUfZA73Q==",
|
"integrity": "sha512-mL/hRmZp6Kw7hmGJBdOZfp/tTYiCdlOcs8DA/+nr2eiXERv0gIhyiKvr2P5DwbBmut3qXEkDalMHTo95BSdL2A==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
@@ -351,9 +351,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||||
"version": "0.1.18",
|
"version": "0.1.19",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.18.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.19.tgz",
|
||||||
"integrity": "sha512-DoQBskl22JAJFZh219ZOJ6o+f1niTZp0qRYngHa/kTIpLKzHWQ0OTtMCz32VBAjAsKjSLNxHE8rrT/S6tvS7KQ==",
|
"integrity": "sha512-AG0FHksbbr+cHVKPi4B8cmBtqb6T9E0uaK4kyZkXrX52/xtv9RYVZcykaB/tSSm0XNFPWWRnx9R8UqNZV/hxMA==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -363,9 +363,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||||
"version": "0.1.18",
|
"version": "0.1.19",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.18.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.19.tgz",
|
||||||
"integrity": "sha512-a/kUM3V6rWuXS80pPECYxKfCUAnq56Of/GPCvnAkpk9C9ldyX10iff4aA6DiPHjEk9V2ytqDfJKl9N3QcMLKLA==",
|
"integrity": "sha512-PDWZ2hvLVXH4Z4WIO1rsWY8ev3NpNm7aXlaey32P+l1Iz9Hia9+F2GBpp2UiEQKfvbk82ucAvBLRmpSsHY8Tlw==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -4852,33 +4852,33 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"@lancedb/vectordb-darwin-arm64": {
|
"@lancedb/vectordb-darwin-arm64": {
|
||||||
"version": "0.1.18",
|
"version": "0.1.19",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.18.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.19.tgz",
|
||||||
"integrity": "sha512-vu8MCFgaAAGmTJF+4RaoApROMpRVVgrCk+V9my4adAfWkkXbSmtxiDgiIwwL1VqdGb8UwzGn3kVbNW7idE1ojA==",
|
"integrity": "sha512-efQhJkBKvMNhjFq3Sw3/qHo9D9gb9UqiIr98n3STsbNxBQjMnWemXn91Ckl40siRG1O8qXcINW7Qs/EGmus+kg==",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"@lancedb/vectordb-darwin-x64": {
|
"@lancedb/vectordb-darwin-x64": {
|
||||||
"version": "0.1.18",
|
"version": "0.1.19",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.18.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.19.tgz",
|
||||||
"integrity": "sha512-ZU30bd6frRyKJ515ow972PlqO2wIiNT4Ohor9+KbUwl/VKDyAwKOKG8cWhRJXTxk0k1oqpiJ6+Q28TcYJ0sSAw==",
|
"integrity": "sha512-r6OZNVyemAssABz2w7CRhe7dyREwBEfTytn+ux1zzTnzsgMgDovCQ0rQ3WZcxWvcy7SFCxiemA9IP1b/lsb4tQ==",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": {
|
"@lancedb/vectordb-linux-arm64-gnu": {
|
||||||
"version": "0.1.18",
|
"version": "0.1.19",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.18.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.19.tgz",
|
||||||
"integrity": "sha512-2UroC026bUYwyciSRonYlXei0SoYbKgfWpozxYOu7GgBAV2CQQtaAPgWJTEl6ZiCNeBmBTx+j0h3+ydUfZA73Q==",
|
"integrity": "sha512-mL/hRmZp6Kw7hmGJBdOZfp/tTYiCdlOcs8DA/+nr2eiXERv0gIhyiKvr2P5DwbBmut3qXEkDalMHTo95BSdL2A==",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"@lancedb/vectordb-linux-x64-gnu": {
|
"@lancedb/vectordb-linux-x64-gnu": {
|
||||||
"version": "0.1.18",
|
"version": "0.1.19",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.18.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.19.tgz",
|
||||||
"integrity": "sha512-DoQBskl22JAJFZh219ZOJ6o+f1niTZp0qRYngHa/kTIpLKzHWQ0OTtMCz32VBAjAsKjSLNxHE8rrT/S6tvS7KQ==",
|
"integrity": "sha512-AG0FHksbbr+cHVKPi4B8cmBtqb6T9E0uaK4kyZkXrX52/xtv9RYVZcykaB/tSSm0XNFPWWRnx9R8UqNZV/hxMA==",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"@lancedb/vectordb-win32-x64-msvc": {
|
"@lancedb/vectordb-win32-x64-msvc": {
|
||||||
"version": "0.1.18",
|
"version": "0.1.19",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.18.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.19.tgz",
|
||||||
"integrity": "sha512-a/kUM3V6rWuXS80pPECYxKfCUAnq56Of/GPCvnAkpk9C9ldyX10iff4aA6DiPHjEk9V2ytqDfJKl9N3QcMLKLA==",
|
"integrity": "sha512-PDWZ2hvLVXH4Z4WIO1rsWY8ev3NpNm7aXlaey32P+l1Iz9Hia9+F2GBpp2UiEQKfvbk82ucAvBLRmpSsHY8Tlw==",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"@neon-rs/cli": {
|
"@neon-rs/cli": {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.1.18",
|
"version": "0.1.19",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
@@ -78,10 +78,10 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.1.18",
|
"@lancedb/vectordb-darwin-arm64": "0.1.19",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.1.18",
|
"@lancedb/vectordb-darwin-x64": "0.1.19",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.1.18",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.1.19",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.1.18",
|
"@lancedb/vectordb-linux-x64-gnu": "0.1.19",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.1.18"
|
"@lancedb/vectordb-win32-x64-msvc": "0.1.19"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,7 +28,6 @@ export interface EmbeddingFunction<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function isEmbeddingFunction<T> (value: any): value is EmbeddingFunction<T> {
|
export function isEmbeddingFunction<T> (value: any): value is EmbeddingFunction<T> {
|
||||||
return Object.keys(value).length === 2 &&
|
return typeof value.sourceColumn === 'string' &&
|
||||||
typeof value.sourceColumn === 'string' &&
|
|
||||||
typeof value.embed === 'function'
|
typeof value.embed === 'function'
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import { describe } from 'mocha'
|
|||||||
import { assert } from 'chai'
|
import { assert } from 'chai'
|
||||||
|
|
||||||
import { OpenAIEmbeddingFunction } from '../../embedding/openai'
|
import { OpenAIEmbeddingFunction } from '../../embedding/openai'
|
||||||
|
import { isEmbeddingFunction } from '../../embedding/embedding_function'
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||||
const { OpenAIApi } = require('openai')
|
const { OpenAIApi } = require('openai')
|
||||||
@@ -47,4 +48,10 @@ describe('OpenAPIEmbeddings', function () {
|
|||||||
assert.deepEqual(vectors[1], stubValue.data.data[1].embedding)
|
assert.deepEqual(vectors[1], stubValue.data.data[1].embedding)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe('isEmbeddingFunction', function () {
|
||||||
|
it('should match the isEmbeddingFunction guard', function () {
|
||||||
|
assert.isTrue(isEmbeddingFunction(new OpenAIEmbeddingFunction('text', 'sk-key')))
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -134,6 +134,18 @@ describe('LanceDB client', function () {
|
|||||||
assert.equal(await table.countRows(), 2)
|
assert.equal(await table.countRows(), 2)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('fails to create a new table when the vector column is missing', async function () {
|
||||||
|
const dir = await track().mkdir('lancejs')
|
||||||
|
const con = await lancedb.connect(dir)
|
||||||
|
|
||||||
|
const data = [
|
||||||
|
{ id: 1, price: 10 }
|
||||||
|
]
|
||||||
|
|
||||||
|
const create = con.createTable('missing_vector', data)
|
||||||
|
await expect(create).to.be.rejectedWith(Error, 'column \'vector\' is missing')
|
||||||
|
})
|
||||||
|
|
||||||
it('use overwrite flag to overwrite existing table', async function () {
|
it('use overwrite flag to overwrite existing table', async function () {
|
||||||
const dir = await track().mkdir('lancejs')
|
const dir = await track().mkdir('lancejs')
|
||||||
const con = await lancedb.connect(dir)
|
const con = await lancedb.connect(dir)
|
||||||
@@ -230,6 +242,22 @@ describe('LanceDB client', function () {
|
|||||||
// Default replace = true
|
// Default replace = true
|
||||||
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
|
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
|
||||||
}).timeout(50_000)
|
}).timeout(50_000)
|
||||||
|
|
||||||
|
it('it should fail when the column is not a vector', async function () {
|
||||||
|
const uri = await createTestDB(32, 300)
|
||||||
|
const con = await lancedb.connect(uri)
|
||||||
|
const table = await con.openTable('vectors')
|
||||||
|
const createIndex = table.createIndex({ type: 'ivf_pq', column: 'name', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
|
||||||
|
await expect(createIndex).to.be.rejectedWith(/VectorIndex requires the column data type to be fixed size list of float32s/)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('it should fail when the column is not a vector', async function () {
|
||||||
|
const uri = await createTestDB(32, 300)
|
||||||
|
const con = await lancedb.connect(uri)
|
||||||
|
const table = await con.openTable('vectors')
|
||||||
|
const createIndex = table.createIndex({ type: 'ivf_pq', column: 'name', num_partitions: -1, max_iters: 2, num_sub_vectors: 2 })
|
||||||
|
await expect(createIndex).to.be.rejectedWith('num_partitions: must be > 0')
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('when using a custom embedding function', function () {
|
describe('when using a custom embedding function', function () {
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[bumpversion]
|
[bumpversion]
|
||||||
current_version = 0.1.15
|
current_version = 0.1.16
|
||||||
commit = True
|
commit = True
|
||||||
message = [python] Bump version: {current_version} → {new_version}
|
message = [python] Bump version: {current_version} → {new_version}
|
||||||
tag = True
|
tag = True
|
||||||
|
|||||||
@@ -11,17 +11,18 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Union
|
from typing import Iterable, List, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
|
|
||||||
|
from .util import safe_import_pandas
|
||||||
|
|
||||||
|
pd = safe_import_pandas()
|
||||||
|
|
||||||
|
DATA = Union[List[dict], dict, "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]]
|
||||||
VEC = Union[list, np.ndarray, pa.Array, pa.ChunkedArray]
|
VEC = Union[list, np.ndarray, pa.Array, pa.ChunkedArray]
|
||||||
URI = Union[str, Path]
|
URI = Union[str, Path]
|
||||||
|
|
||||||
# TODO support generator
|
|
||||||
DATA = Union[List[dict], dict, pd.DataFrame]
|
|
||||||
VECTOR_COLUMN_NAME = "vector"
|
VECTOR_COLUMN_NAME = "vector"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -12,12 +12,13 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from .exceptions import MissingColumnError, MissingValueError
|
from .exceptions import MissingColumnError, MissingValueError
|
||||||
|
from .util import safe_import_pandas
|
||||||
|
|
||||||
|
pd = safe_import_pandas()
|
||||||
|
|
||||||
|
|
||||||
def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
|
def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
|
||||||
"""Create a Contextualizer object for the given DataFrame.
|
"""Create a Contextualizer object for the given DataFrame.
|
||||||
|
|
||||||
Used to create context windows. Context windows are rolling subsets of text
|
Used to create context windows. Context windows are rolling subsets of text
|
||||||
@@ -175,8 +176,12 @@ class Contextualizer:
|
|||||||
self._min_window_size = min_window_size
|
self._min_window_size = min_window_size
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def to_df(self) -> pd.DataFrame:
|
def to_df(self) -> "pd.DataFrame":
|
||||||
"""Create the context windows and return a DataFrame."""
|
"""Create the context windows and return a DataFrame."""
|
||||||
|
if pd is None:
|
||||||
|
raise ImportError(
|
||||||
|
"pandas is required to create context windows using lancedb"
|
||||||
|
)
|
||||||
|
|
||||||
if self._text_col not in self._raw_df.columns.tolist():
|
if self._text_col not in self._raw_df.columns.tolist():
|
||||||
raise MissingColumnError(self._text_col)
|
raise MissingColumnError(self._text_col)
|
||||||
|
|||||||
@@ -16,9 +16,8 @@ from __future__ import annotations
|
|||||||
import os
|
import os
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
from typing import Optional
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
from pyarrow import fs
|
from pyarrow import fs
|
||||||
|
|
||||||
@@ -39,9 +38,7 @@ class DBConnection(ABC):
|
|||||||
def create_table(
|
def create_table(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
data: Optional[
|
data: Optional[DATA] = None,
|
||||||
Union[List[dict], dict, pd.DataFrame, pa.Table, Iterable[pa.RecordBatch]],
|
|
||||||
] = None,
|
|
||||||
schema: Optional[pa.Schema] = None,
|
schema: Optional[pa.Schema] = None,
|
||||||
mode: str = "create",
|
mode: str = "create",
|
||||||
on_bad_vectors: str = "error",
|
on_bad_vectors: str = "error",
|
||||||
@@ -279,7 +276,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
def create_table(
|
def create_table(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
data: Optional[Union[List[dict], dict, pd.DataFrame]] = None,
|
data: Optional[DATA] = None,
|
||||||
schema: pa.Schema = None,
|
schema: pa.Schema = None,
|
||||||
mode: str = "create",
|
mode: str = "create",
|
||||||
on_bad_vectors: str = "error",
|
on_bad_vectors: str = "error",
|
||||||
@@ -319,14 +316,20 @@ class LanceDBConnection(DBConnection):
|
|||||||
"""
|
"""
|
||||||
return LanceTable.open(self, name)
|
return LanceTable.open(self, name)
|
||||||
|
|
||||||
def drop_table(self, name: str):
|
def drop_table(self, name: str, ignore_missing: bool = False):
|
||||||
"""Drop a table from the database.
|
"""Drop a table from the database.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
|
ignore_missing: bool, default False
|
||||||
|
If True, ignore if the table does not exist.
|
||||||
"""
|
"""
|
||||||
filesystem, path = fs_from_uri(self.uri)
|
try:
|
||||||
table_path = os.path.join(path, name + ".lance")
|
filesystem, path = fs_from_uri(self.uri)
|
||||||
filesystem.delete_dir(table_path)
|
table_path = os.path.join(path, name + ".lance")
|
||||||
|
filesystem.delete_dir(table_path)
|
||||||
|
except FileNotFoundError:
|
||||||
|
if not ignore_missing:
|
||||||
|
raise
|
||||||
|
|||||||
@@ -16,15 +16,19 @@ import sys
|
|||||||
from typing import Callable, Union
|
from typing import Callable, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
from lance.vector import vec_to_table
|
from lance.vector import vec_to_table
|
||||||
from retry import retry
|
from retry import retry
|
||||||
|
|
||||||
|
from .util import safe_import_pandas
|
||||||
|
|
||||||
|
pd = safe_import_pandas()
|
||||||
|
DATA = Union[pa.Table, "pd.DataFrame"]
|
||||||
|
|
||||||
|
|
||||||
def with_embeddings(
|
def with_embeddings(
|
||||||
func: Callable,
|
func: Callable,
|
||||||
data: Union[pa.Table, pd.DataFrame],
|
data: DATA,
|
||||||
column: str = "text",
|
column: str = "text",
|
||||||
wrap_api: bool = True,
|
wrap_api: bool = True,
|
||||||
show_progress: bool = False,
|
show_progress: bool = False,
|
||||||
@@ -60,7 +64,7 @@ def with_embeddings(
|
|||||||
func = func.batch_size(batch_size)
|
func = func.batch_size(batch_size)
|
||||||
if show_progress:
|
if show_progress:
|
||||||
func = func.show_progress()
|
func = func.show_progress()
|
||||||
if isinstance(data, pd.DataFrame):
|
if pd is not None and isinstance(data, pd.DataFrame):
|
||||||
data = pa.Table.from_pandas(data, preserve_index=False)
|
data = pa.Table.from_pandas(data, preserve_index=False)
|
||||||
embeddings = func(data[column].to_numpy())
|
embeddings = func(data[column].to_numpy())
|
||||||
table = vec_to_table(np.array(embeddings))
|
table = vec_to_table(np.array(embeddings))
|
||||||
|
|||||||
@@ -249,3 +249,36 @@ def pydantic_to_schema(model: Type[pydantic.BaseModel]) -> pa.Schema:
|
|||||||
"""
|
"""
|
||||||
fields = _pydantic_model_to_fields(model)
|
fields = _pydantic_model_to_fields(model)
|
||||||
return pa.schema(fields)
|
return pa.schema(fields)
|
||||||
|
|
||||||
|
|
||||||
|
class LanceModel(pydantic.BaseModel):
|
||||||
|
"""
|
||||||
|
A Pydantic Model base class that can be converted to a LanceDB Table.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> import lancedb
|
||||||
|
>>> from lancedb.pydantic import LanceModel, vector
|
||||||
|
>>>
|
||||||
|
>>> class TestModel(LanceModel):
|
||||||
|
... name: str
|
||||||
|
... vector: vector(2)
|
||||||
|
...
|
||||||
|
>>> db = lancedb.connect("/tmp")
|
||||||
|
>>> table = db.create_table("test", schema=TestModel.to_arrow_schema())
|
||||||
|
>>> table.add([
|
||||||
|
... TestModel(name="test", vector=[1.0, 2.0])
|
||||||
|
... ])
|
||||||
|
>>> table.search([0., 0.]).limit(1).to_pydantic(TestModel)
|
||||||
|
[TestModel(name='test', vector=FixedSizeList(dim=2))]
|
||||||
|
"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def to_arrow_schema(cls):
|
||||||
|
return pydantic_to_schema(cls)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def field_names(cls) -> List[str]:
|
||||||
|
if PYDANTIC_VERSION.major < 2:
|
||||||
|
return list(cls.__fields__.keys())
|
||||||
|
return list(cls.model_fields.keys())
|
||||||
|
|||||||
@@ -13,17 +13,20 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import List, Literal, Optional, Union
|
from typing import List, Literal, Optional, Type, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
from pydantic import BaseModel
|
import pydantic
|
||||||
|
|
||||||
from .common import VECTOR_COLUMN_NAME
|
from .common import VECTOR_COLUMN_NAME
|
||||||
|
from .pydantic import LanceModel
|
||||||
|
from .util import safe_import_pandas
|
||||||
|
|
||||||
|
pd = safe_import_pandas()
|
||||||
|
|
||||||
|
|
||||||
class Query(BaseModel):
|
class Query(pydantic.BaseModel):
|
||||||
"""A Query"""
|
"""A Query"""
|
||||||
|
|
||||||
vector_column: str = VECTOR_COLUMN_NAME
|
vector_column: str = VECTOR_COLUMN_NAME
|
||||||
@@ -198,7 +201,7 @@ class LanceQueryBuilder:
|
|||||||
self._refine_factor = refine_factor
|
self._refine_factor = refine_factor
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def to_df(self) -> pd.DataFrame:
|
def to_df(self) -> "pd.DataFrame":
|
||||||
"""
|
"""
|
||||||
Execute the query and return the results as a pandas DataFrame.
|
Execute the query and return the results as a pandas DataFrame.
|
||||||
In addition to the selected columns, LanceDB also returns a vector
|
In addition to the selected columns, LanceDB also returns a vector
|
||||||
@@ -230,9 +233,26 @@ class LanceQueryBuilder:
|
|||||||
)
|
)
|
||||||
return self._table._execute_query(query)
|
return self._table._execute_query(query)
|
||||||
|
|
||||||
|
def to_pydantic(self, model: Type[LanceModel]) -> List[LanceModel]:
|
||||||
|
"""Return the table as a list of pydantic models.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
model: Type[LanceModel]
|
||||||
|
The pydantic model to use.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
List[LanceModel]
|
||||||
|
"""
|
||||||
|
return [
|
||||||
|
model(**{k: v for k, v in row.items() if k in model.field_names()})
|
||||||
|
for row in self.to_arrow().to_pylist()
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class LanceFtsQueryBuilder(LanceQueryBuilder):
|
class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||||
def to_arrow(self) -> pd.Table:
|
def to_arrow(self) -> pa.Table:
|
||||||
try:
|
try:
|
||||||
import tantivy
|
import tantivy
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
|||||||
@@ -20,7 +20,6 @@ import pyarrow as pa
|
|||||||
|
|
||||||
from lancedb.common import DATA
|
from lancedb.common import DATA
|
||||||
from lancedb.db import DBConnection
|
from lancedb.db import DBConnection
|
||||||
from lancedb.schema import schema_to_json
|
|
||||||
from lancedb.table import Table, _sanitize_data
|
from lancedb.table import Table, _sanitize_data
|
||||||
|
|
||||||
from .arrow import to_ipc_binary
|
from .arrow import to_ipc_binary
|
||||||
|
|||||||
@@ -16,11 +16,11 @@ from functools import cached_property
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
|
from lance import json_to_schema
|
||||||
|
|
||||||
from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME
|
from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||||
|
|
||||||
from ..query import LanceQueryBuilder, Query
|
from ..query import LanceQueryBuilder
|
||||||
from ..schema import json_to_schema
|
|
||||||
from ..table import Query, Table, _sanitize_data
|
from ..table import Query, Table, _sanitize_data
|
||||||
from .arrow import to_ipc_binary
|
from .arrow import to_ipc_binary
|
||||||
from .client import ARROW_STREAM_CONTENT_TYPE
|
from .client import ARROW_STREAM_CONTENT_TYPE
|
||||||
|
|||||||
@@ -12,11 +12,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
"""Schema related utilities."""
|
"""Schema related utilities."""
|
||||||
|
|
||||||
from typing import Any, Dict, Type
|
|
||||||
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
from lance import json_to_schema, schema_to_json
|
|
||||||
|
|
||||||
|
|
||||||
def vector(dimension: int, value_type: pa.DataType = pa.float32()) -> pa.DataType:
|
def vector(dimension: int, value_type: pa.DataType = pa.float32()) -> pa.DataType:
|
||||||
|
|||||||
@@ -20,26 +20,32 @@ from typing import Iterable, List, Union
|
|||||||
|
|
||||||
import lance
|
import lance
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pyarrow.compute as pc
|
import pyarrow.compute as pc
|
||||||
from lance import LanceDataset
|
from lance import LanceDataset
|
||||||
from lance.vector import vec_to_table
|
from lance.vector import vec_to_table
|
||||||
|
|
||||||
from .common import DATA, VEC, VECTOR_COLUMN_NAME
|
from .common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||||
|
from .pydantic import LanceModel
|
||||||
from .query import LanceFtsQueryBuilder, LanceQueryBuilder, Query
|
from .query import LanceFtsQueryBuilder, LanceQueryBuilder, Query
|
||||||
from .util import fs_from_uri
|
from .util import fs_from_uri, safe_import_pandas
|
||||||
|
|
||||||
|
pd = safe_import_pandas()
|
||||||
|
|
||||||
|
|
||||||
def _sanitize_data(data, schema, on_bad_vectors, fill_value):
|
def _sanitize_data(data, schema, on_bad_vectors, fill_value):
|
||||||
if isinstance(data, list):
|
if isinstance(data, list):
|
||||||
|
# convert to list of dict if data is a bunch of LanceModels
|
||||||
|
if isinstance(data[0], LanceModel):
|
||||||
|
schema = data[0].__class__.to_arrow_schema()
|
||||||
|
data = [dict(d) for d in data]
|
||||||
data = pa.Table.from_pylist(data)
|
data = pa.Table.from_pylist(data)
|
||||||
data = _sanitize_schema(
|
data = _sanitize_schema(
|
||||||
data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||||
)
|
)
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
data = vec_to_table(data)
|
data = vec_to_table(data)
|
||||||
if isinstance(data, pd.DataFrame):
|
if pd is not None and isinstance(data, pd.DataFrame):
|
||||||
data = pa.Table.from_pandas(data)
|
data = pa.Table.from_pandas(data)
|
||||||
data = _sanitize_schema(
|
data = _sanitize_schema(
|
||||||
data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||||
@@ -94,7 +100,7 @@ class Table(ABC):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def to_pandas(self) -> pd.DataFrame:
|
def to_pandas(self):
|
||||||
"""Return the table as a pandas DataFrame.
|
"""Return the table as a pandas DataFrame.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
@@ -328,7 +334,7 @@ class LanceTable(Table):
|
|||||||
"""Return the first n rows of the table."""
|
"""Return the first n rows of the table."""
|
||||||
return self._dataset.head(n)
|
return self._dataset.head(n)
|
||||||
|
|
||||||
def to_pandas(self) -> pd.DataFrame:
|
def to_pandas(self) -> "pd.DataFrame":
|
||||||
"""Return the table as a pandas DataFrame.
|
"""Return the table as a pandas DataFrame.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ import os
|
|||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import pyarrow as pa
|
|
||||||
import pyarrow.fs as pa_fs
|
import pyarrow.fs as pa_fs
|
||||||
|
|
||||||
|
|
||||||
@@ -76,3 +75,12 @@ def fs_from_uri(uri: str) -> Tuple[pa_fs.FileSystem, str]:
|
|||||||
return fs, path
|
return fs, path
|
||||||
|
|
||||||
return pa_fs.FileSystem.from_uri(uri)
|
return pa_fs.FileSystem.from_uri(uri)
|
||||||
|
|
||||||
|
|
||||||
|
def safe_import_pandas():
|
||||||
|
try:
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
return pd
|
||||||
|
except ImportError:
|
||||||
|
return None
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.1.15"
|
version = "0.1.16"
|
||||||
dependencies = ["pylance~=0.5.8", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic", "attr", "semver"]
|
dependencies = ["pylance==0.5.10", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic", "attr", "semver"]
|
||||||
description = "lancedb"
|
description = "lancedb"
|
||||||
authors = [
|
authors = [
|
||||||
{ name = "LanceDB Devs", email = "dev@lancedb.com" },
|
{ name = "LanceDB Devs", email = "dev@lancedb.com" },
|
||||||
@@ -37,7 +37,7 @@ repository = "https://github.com/lancedb/lancedb"
|
|||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
tests = [
|
tests = [
|
||||||
"pytest", "pytest-mock", "pytest-asyncio"
|
"pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio"
|
||||||
]
|
]
|
||||||
dev = [
|
dev = [
|
||||||
"ruff", "pre-commit", "black"
|
"ruff", "pre-commit", "black"
|
||||||
|
|||||||
@@ -149,6 +149,10 @@ def test_delete_table(tmp_path):
|
|||||||
db.create_table("test", data=data)
|
db.create_table("test", data=data)
|
||||||
assert db.table_names() == ["test"]
|
assert db.table_names() == ["test"]
|
||||||
|
|
||||||
|
# dropping a table that does not exist should pass
|
||||||
|
# if ignore_missing=True
|
||||||
|
db.drop_table("does_not_exist", ignore_missing=True)
|
||||||
|
|
||||||
|
|
||||||
def test_empty_or_nonexistent_table(tmp_path):
|
def test_empty_or_nonexistent_table(tmp_path):
|
||||||
db = lancedb.connect(tmp_path)
|
db = lancedb.connect(tmp_path)
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ import pyarrow as pa
|
|||||||
import pydantic
|
import pydantic
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from lancedb.pydantic import PYDANTIC_VERSION, pydantic_to_schema, vector
|
from lancedb.pydantic import PYDANTIC_VERSION, LanceModel, pydantic_to_schema, vector
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
@@ -163,3 +163,13 @@ def test_fixed_size_list_validation():
|
|||||||
TestModel(vec=range(7))
|
TestModel(vec=range(7))
|
||||||
|
|
||||||
TestModel(vec=range(8))
|
TestModel(vec=range(8))
|
||||||
|
|
||||||
|
|
||||||
|
def test_lance_model():
|
||||||
|
class TestModel(LanceModel):
|
||||||
|
vec: vector(16)
|
||||||
|
li: List[int]
|
||||||
|
|
||||||
|
schema = pydantic_to_schema(TestModel)
|
||||||
|
assert schema == TestModel.to_arrow_schema()
|
||||||
|
assert TestModel.field_names() == ["vec", "li"]
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import pyarrow as pa
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from lancedb.db import LanceDBConnection
|
from lancedb.db import LanceDBConnection
|
||||||
|
from lancedb.pydantic import LanceModel, vector
|
||||||
from lancedb.query import LanceQueryBuilder, Query
|
from lancedb.query import LanceQueryBuilder, Query
|
||||||
from lancedb.table import LanceTable
|
from lancedb.table import LanceTable
|
||||||
|
|
||||||
@@ -64,6 +65,24 @@ def table(tmp_path) -> MockTable:
|
|||||||
return MockTable(tmp_path)
|
return MockTable(tmp_path)
|
||||||
|
|
||||||
|
|
||||||
|
def test_cast(table):
|
||||||
|
class TestModel(LanceModel):
|
||||||
|
vector: vector(2)
|
||||||
|
id: int
|
||||||
|
str_field: str
|
||||||
|
float_field: float
|
||||||
|
|
||||||
|
q = LanceQueryBuilder(table, [0, 0], "vector").limit(1)
|
||||||
|
results = q.to_pydantic(TestModel)
|
||||||
|
assert len(results) == 1
|
||||||
|
r0 = results[0]
|
||||||
|
assert isinstance(r0, TestModel)
|
||||||
|
assert r0.id == 1
|
||||||
|
assert r0.vector == [1, 2]
|
||||||
|
assert r0.str_field == "a"
|
||||||
|
assert r0.float_field == 1.0
|
||||||
|
|
||||||
|
|
||||||
def test_query_builder(table):
|
def test_query_builder(table):
|
||||||
df = LanceQueryBuilder(table, [0, 0], "vector").limit(1).select(["id"]).to_df()
|
df = LanceQueryBuilder(table, [0, 0], "vector").limit(1).select(["id"]).to_df()
|
||||||
assert df["id"].values[0] == 1
|
assert df["id"].values[0] == 1
|
||||||
|
|||||||
@@ -13,15 +13,16 @@
|
|||||||
|
|
||||||
import functools
|
import functools
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import List
|
||||||
from unittest.mock import PropertyMock, patch
|
from unittest.mock import PropertyMock, patch
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pytest
|
import pytest
|
||||||
from lance.vector import vec_to_table
|
|
||||||
|
|
||||||
from lancedb.db import LanceDBConnection
|
from lancedb.db import LanceDBConnection
|
||||||
|
from lancedb.pydantic import LanceModel, vector
|
||||||
from lancedb.table import LanceTable
|
from lancedb.table import LanceTable
|
||||||
|
|
||||||
|
|
||||||
@@ -135,6 +136,17 @@ def test_add(db):
|
|||||||
_add(table, schema)
|
_add(table, schema)
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_pydantic_model(db):
|
||||||
|
class TestModel(LanceModel):
|
||||||
|
vector: vector(16)
|
||||||
|
li: List[int]
|
||||||
|
|
||||||
|
data = TestModel(vector=list(range(16)), li=[1, 2, 3])
|
||||||
|
table = LanceTable.create(db, "test", data=[data])
|
||||||
|
assert len(table) == 1
|
||||||
|
assert table.schema == TestModel.to_arrow_schema()
|
||||||
|
|
||||||
|
|
||||||
def _add(table, schema):
|
def _add(table, schema):
|
||||||
# table = LanceTable(db, "test")
|
# table = LanceTable(db, "test")
|
||||||
assert len(table) == 2
|
assert len(table) == 2
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "vectordb-node"
|
name = "vectordb-node"
|
||||||
version = "0.1.18"
|
version = "0.1.19"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
@@ -13,6 +13,7 @@ crate-type = ["cdylib"]
|
|||||||
arrow-array = { workspace = true }
|
arrow-array = { workspace = true }
|
||||||
arrow-ipc = { workspace = true }
|
arrow-ipc = { workspace = true }
|
||||||
arrow-schema = { workspace = true }
|
arrow-schema = { workspace = true }
|
||||||
|
conv = "0.3.3"
|
||||||
once_cell = "1"
|
once_cell = "1"
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
half = { workspace = true }
|
half = { workspace = true }
|
||||||
@@ -21,5 +22,6 @@ vectordb = { path = "../../vectordb" }
|
|||||||
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
||||||
neon = {version = "0.10.1", default-features = false, features = ["channel-api", "napi-6", "promise-api", "task-api"] }
|
neon = {version = "0.10.1", default-features = false, features = ["channel-api", "napi-6", "promise-api", "task-api"] }
|
||||||
object_store = { workspace = true, features = ["aws"] }
|
object_store = { workspace = true, features = ["aws"] }
|
||||||
|
snafu = { workspace = true }
|
||||||
async-trait = "0"
|
async-trait = "0"
|
||||||
env_logger = "0"
|
env_logger = "0"
|
||||||
|
|||||||
@@ -13,27 +13,30 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::io::Cursor;
|
use std::io::Cursor;
|
||||||
|
use std::ops::Deref;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow_array::cast::as_list_array;
|
use arrow_array::cast::as_list_array;
|
||||||
use arrow_array::{Array, FixedSizeListArray, RecordBatch};
|
use arrow_array::{Array, ArrayRef, FixedSizeListArray, RecordBatch};
|
||||||
use arrow_ipc::reader::FileReader;
|
use arrow_ipc::reader::FileReader;
|
||||||
|
use arrow_ipc::writer::FileWriter;
|
||||||
use arrow_schema::{DataType, Field, Schema};
|
use arrow_schema::{DataType, Field, Schema};
|
||||||
use lance::arrow::{FixedSizeListArrayExt, RecordBatchExt};
|
use lance::arrow::{FixedSizeListArrayExt, RecordBatchExt};
|
||||||
|
use vectordb::table::VECTOR_COLUMN_NAME;
|
||||||
|
|
||||||
|
use crate::error::{MissingColumnSnafu, Result};
|
||||||
|
use snafu::prelude::*;
|
||||||
|
|
||||||
|
pub(crate) fn convert_record_batch(record_batch: RecordBatch) -> Result<RecordBatch> {
|
||||||
|
let column = get_column(VECTOR_COLUMN_NAME, &record_batch)?;
|
||||||
|
|
||||||
pub(crate) fn convert_record_batch(record_batch: RecordBatch) -> RecordBatch {
|
|
||||||
let column = record_batch
|
|
||||||
.column_by_name("vector")
|
|
||||||
.cloned()
|
|
||||||
.expect("vector column is missing");
|
|
||||||
// TODO: we should just consume the underlying js buffer in the future instead of this arrow around a bunch of times
|
// TODO: we should just consume the underlying js buffer in the future instead of this arrow around a bunch of times
|
||||||
let arr = as_list_array(column.as_ref());
|
let arr = as_list_array(column.as_ref());
|
||||||
let list_size = arr.values().len() / record_batch.num_rows();
|
let list_size = arr.values().len() / record_batch.num_rows();
|
||||||
let r =
|
let r = FixedSizeListArray::try_new_from_values(arr.values().to_owned(), list_size as i32)?;
|
||||||
FixedSizeListArray::try_new_from_values(arr.values().to_owned(), list_size as i32).unwrap();
|
|
||||||
|
|
||||||
let schema = Arc::new(Schema::new(vec![Field::new(
|
let schema = Arc::new(Schema::new(vec![Field::new(
|
||||||
"vector",
|
VECTOR_COLUMN_NAME,
|
||||||
DataType::FixedSizeList(
|
DataType::FixedSizeList(
|
||||||
Arc::new(Field::new("item", DataType::Float32, true)),
|
Arc::new(Field::new("item", DataType::Float32, true)),
|
||||||
list_size as i32,
|
list_size as i32,
|
||||||
@@ -41,22 +44,42 @@ pub(crate) fn convert_record_batch(record_batch: RecordBatch) -> RecordBatch {
|
|||||||
true,
|
true,
|
||||||
)]));
|
)]));
|
||||||
|
|
||||||
let mut new_batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(r)]).unwrap();
|
let mut new_batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(r)])?;
|
||||||
|
|
||||||
if record_batch.num_columns() > 1 {
|
if record_batch.num_columns() > 1 {
|
||||||
let rb = record_batch.drop_column("vector").unwrap();
|
let rb = record_batch.drop_column(VECTOR_COLUMN_NAME)?;
|
||||||
new_batch = new_batch.merge(&rb).unwrap();
|
new_batch = new_batch.merge(&rb)?;
|
||||||
}
|
}
|
||||||
new_batch
|
Ok(new_batch)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn arrow_buffer_to_record_batch(slice: &[u8]) -> Vec<RecordBatch> {
|
fn get_column(column_name: &str, record_batch: &RecordBatch) -> Result<ArrayRef> {
|
||||||
|
record_batch
|
||||||
|
.column_by_name(column_name)
|
||||||
|
.cloned()
|
||||||
|
.context(MissingColumnSnafu { name: column_name })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn arrow_buffer_to_record_batch(slice: &[u8]) -> Result<Vec<RecordBatch>> {
|
||||||
let mut batches: Vec<RecordBatch> = Vec::new();
|
let mut batches: Vec<RecordBatch> = Vec::new();
|
||||||
let fr = FileReader::try_new(Cursor::new(slice), None);
|
let file_reader = FileReader::try_new(Cursor::new(slice), None)?;
|
||||||
let file_reader = fr.unwrap();
|
|
||||||
for b in file_reader {
|
for b in file_reader {
|
||||||
let record_batch = convert_record_batch(b.unwrap());
|
let record_batch = convert_record_batch(b?)?;
|
||||||
batches.push(record_batch);
|
batches.push(record_batch);
|
||||||
}
|
}
|
||||||
batches
|
Ok(batches)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn record_batch_to_buffer(batches: Vec<RecordBatch>) -> Result<Vec<u8>> {
|
||||||
|
if batches.is_empty() {
|
||||||
|
return Ok(Vec::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
let schema = batches.get(0).unwrap().schema();
|
||||||
|
let mut fr = FileWriter::try_new(Vec::new(), schema.deref())?;
|
||||||
|
for batch in batches.iter() {
|
||||||
|
fr.write(batch)?
|
||||||
|
}
|
||||||
|
fr.finish()?;
|
||||||
|
Ok(fr.into_inner()?)
|
||||||
}
|
}
|
||||||
|
|||||||
88
rust/ffi/node/src/error.rs
Normal file
88
rust/ffi/node/src/error.rs
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
// Copyright 2023 Lance Developers.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
use arrow_schema::ArrowError;
|
||||||
|
use neon::context::Context;
|
||||||
|
use neon::prelude::NeonResult;
|
||||||
|
use snafu::Snafu;
|
||||||
|
|
||||||
|
#[derive(Debug, Snafu)]
|
||||||
|
#[snafu(visibility(pub(crate)))]
|
||||||
|
pub enum Error {
|
||||||
|
#[snafu(display("column '{name}' is missing"))]
|
||||||
|
MissingColumn { name: String },
|
||||||
|
#[snafu(display("{name}: {message}"))]
|
||||||
|
RangeError { name: String, message: String },
|
||||||
|
#[snafu(display("{index_type} is not a valid index type"))]
|
||||||
|
InvalidIndexType { index_type: String },
|
||||||
|
|
||||||
|
#[snafu(display("{message}"))]
|
||||||
|
LanceDB { message: String },
|
||||||
|
#[snafu(display("{message}"))]
|
||||||
|
Neon { message: String },
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type Result<T> = std::result::Result<T, Error>;
|
||||||
|
|
||||||
|
impl From<vectordb::error::Error> for Error {
|
||||||
|
fn from(e: vectordb::error::Error) -> Self {
|
||||||
|
Self::LanceDB {
|
||||||
|
message: e.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<lance::Error> for Error {
|
||||||
|
fn from(e: lance::Error) -> Self {
|
||||||
|
Self::LanceDB {
|
||||||
|
message: e.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<ArrowError> for Error {
|
||||||
|
fn from(value: ArrowError) -> Self {
|
||||||
|
Self::LanceDB {
|
||||||
|
message: value.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<neon::result::Throw> for Error {
|
||||||
|
fn from(value: neon::result::Throw) -> Self {
|
||||||
|
Self::Neon {
|
||||||
|
message: value.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// ResultExt is used to transform a [`Result`] into a [`NeonResult`],
|
||||||
|
/// so it can be returned as a JavaScript error
|
||||||
|
/// Copied from [Neon](https://github.com/neon-bindings/neon/blob/4c2e455a9e6814f1ba0178616d63caec7f4df317/crates/neon/src/result/mod.rs#L88)
|
||||||
|
pub trait ResultExt<T> {
|
||||||
|
fn or_throw<'a, C: Context<'a>>(self, cx: &mut C) -> NeonResult<T>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Implement ResultExt for the std Result so it can be used any Result type
|
||||||
|
impl<T, E> ResultExt<T> for std::result::Result<T, E>
|
||||||
|
where
|
||||||
|
E: std::fmt::Display,
|
||||||
|
{
|
||||||
|
fn or_throw<'a, C: Context<'a>>(self, cx: &mut C) -> NeonResult<T> {
|
||||||
|
match self {
|
||||||
|
Ok(value) => Ok(value),
|
||||||
|
Err(error) => cx.throw_error(error.to_string()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -22,12 +22,15 @@ use neon::prelude::*;
|
|||||||
|
|
||||||
use vectordb::index::vector::{IvfPQIndexBuilder, VectorIndexBuilder};
|
use vectordb::index::vector::{IvfPQIndexBuilder, VectorIndexBuilder};
|
||||||
|
|
||||||
|
use crate::error::Error::InvalidIndexType;
|
||||||
|
use crate::error::ResultExt;
|
||||||
|
use crate::neon_ext::js_object_ext::JsObjectExt;
|
||||||
use crate::{runtime, JsTable};
|
use crate::{runtime, JsTable};
|
||||||
|
|
||||||
pub(crate) fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
pub(crate) fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||||
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
||||||
let index_params = cx.argument::<JsObject>(0)?;
|
let index_params = cx.argument::<JsObject>(0)?;
|
||||||
let index_params_builder = get_index_params_builder(&mut cx, index_params).unwrap();
|
let index_params_builder = get_index_params_builder(&mut cx, index_params).or_throw(&mut cx)?;
|
||||||
|
|
||||||
let rt = runtime(&mut cx)?;
|
let rt = runtime(&mut cx)?;
|
||||||
let channel = cx.channel();
|
let channel = cx.channel();
|
||||||
@@ -54,27 +57,21 @@ pub(crate) fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsP
|
|||||||
fn get_index_params_builder(
|
fn get_index_params_builder(
|
||||||
cx: &mut FunctionContext,
|
cx: &mut FunctionContext,
|
||||||
obj: Handle<JsObject>,
|
obj: Handle<JsObject>,
|
||||||
) -> Result<impl VectorIndexBuilder, String> {
|
) -> crate::error::Result<impl VectorIndexBuilder> {
|
||||||
let idx_type = obj
|
let idx_type = obj.get::<JsString, _, _>(cx, "type")?.value(cx);
|
||||||
.get::<JsString, _, _>(cx, "type")
|
|
||||||
.map_err(|t| t.to_string())?
|
|
||||||
.value(cx);
|
|
||||||
|
|
||||||
match idx_type.as_str() {
|
match idx_type.as_str() {
|
||||||
"ivf_pq" => {
|
"ivf_pq" => {
|
||||||
let mut index_builder: IvfPQIndexBuilder = IvfPQIndexBuilder::new();
|
let mut index_builder: IvfPQIndexBuilder = IvfPQIndexBuilder::new();
|
||||||
let mut pq_params = PQBuildParams::default();
|
let mut pq_params = PQBuildParams::default();
|
||||||
|
|
||||||
obj.get_opt::<JsString, _, _>(cx, "column")
|
obj.get_opt::<JsString, _, _>(cx, "column")?
|
||||||
.map_err(|t| t.to_string())?
|
|
||||||
.map(|s| index_builder.column(s.value(cx)));
|
.map(|s| index_builder.column(s.value(cx)));
|
||||||
|
|
||||||
obj.get_opt::<JsString, _, _>(cx, "index_name")
|
obj.get_opt::<JsString, _, _>(cx, "index_name")?
|
||||||
.map_err(|t| t.to_string())?
|
|
||||||
.map(|s| index_builder.index_name(s.value(cx)));
|
.map(|s| index_builder.index_name(s.value(cx)));
|
||||||
|
|
||||||
obj.get_opt::<JsString, _, _>(cx, "metric_type")
|
obj.get_opt::<JsString, _, _>(cx, "metric_type")?
|
||||||
.map_err(|t| t.to_string())?
|
|
||||||
.map(|s| MetricType::try_from(s.value(cx).as_str()))
|
.map(|s| MetricType::try_from(s.value(cx).as_str()))
|
||||||
.map(|mt| {
|
.map(|mt| {
|
||||||
let metric_type = mt.unwrap();
|
let metric_type = mt.unwrap();
|
||||||
@@ -82,15 +79,8 @@ fn get_index_params_builder(
|
|||||||
pq_params.metric_type = metric_type;
|
pq_params.metric_type = metric_type;
|
||||||
});
|
});
|
||||||
|
|
||||||
let num_partitions = obj
|
let num_partitions = obj.get_opt_usize(cx, "num_partitions")?;
|
||||||
.get_opt::<JsNumber, _, _>(cx, "num_partitions")
|
let max_iters = obj.get_opt_usize(cx, "max_iters")?;
|
||||||
.map_err(|t| t.to_string())?
|
|
||||||
.map(|s| s.value(cx) as usize);
|
|
||||||
|
|
||||||
let max_iters = obj
|
|
||||||
.get_opt::<JsNumber, _, _>(cx, "max_iters")
|
|
||||||
.map_err(|t| t.to_string())?
|
|
||||||
.map(|s| s.value(cx) as usize);
|
|
||||||
|
|
||||||
num_partitions.map(|np| {
|
num_partitions.map(|np| {
|
||||||
let max_iters = max_iters.unwrap_or(50);
|
let max_iters = max_iters.unwrap_or(50);
|
||||||
@@ -102,32 +92,28 @@ fn get_index_params_builder(
|
|||||||
index_builder.ivf_params(ivf_params)
|
index_builder.ivf_params(ivf_params)
|
||||||
});
|
});
|
||||||
|
|
||||||
obj.get_opt::<JsBoolean, _, _>(cx, "use_opq")
|
obj.get_opt::<JsBoolean, _, _>(cx, "use_opq")?
|
||||||
.map_err(|t| t.to_string())?
|
|
||||||
.map(|s| pq_params.use_opq = s.value(cx));
|
.map(|s| pq_params.use_opq = s.value(cx));
|
||||||
|
|
||||||
obj.get_opt::<JsNumber, _, _>(cx, "num_sub_vectors")
|
obj.get_opt_usize(cx, "num_sub_vectors")?
|
||||||
.map_err(|t| t.to_string())?
|
.map(|s| pq_params.num_sub_vectors = s);
|
||||||
.map(|s| pq_params.num_sub_vectors = s.value(cx) as usize);
|
|
||||||
|
|
||||||
obj.get_opt::<JsNumber, _, _>(cx, "num_bits")
|
obj.get_opt_usize(cx, "num_bits")?
|
||||||
.map_err(|t| t.to_string())?
|
.map(|s| pq_params.num_bits = s);
|
||||||
.map(|s| pq_params.num_bits = s.value(cx) as usize);
|
|
||||||
|
|
||||||
obj.get_opt::<JsNumber, _, _>(cx, "max_iters")
|
obj.get_opt_usize(cx, "max_iters")?
|
||||||
.map_err(|t| t.to_string())?
|
.map(|s| pq_params.max_iters = s);
|
||||||
.map(|s| pq_params.max_iters = s.value(cx) as usize);
|
|
||||||
|
|
||||||
obj.get_opt::<JsNumber, _, _>(cx, "max_opq_iters")
|
obj.get_opt_usize(cx, "max_opq_iters")?
|
||||||
.map_err(|t| t.to_string())?
|
.map(|s| pq_params.max_opq_iters = s);
|
||||||
.map(|s| pq_params.max_opq_iters = s.value(cx) as usize);
|
|
||||||
|
|
||||||
obj.get_opt::<JsBoolean, _, _>(cx, "replace")
|
obj.get_opt::<JsBoolean, _, _>(cx, "replace")?
|
||||||
.map_err(|t| t.to_string())?
|
|
||||||
.map(|s| index_builder.replace(s.value(cx)));
|
.map(|s| index_builder.replace(s.value(cx)));
|
||||||
|
|
||||||
Ok(index_builder)
|
Ok(index_builder)
|
||||||
}
|
}
|
||||||
t => Err(format!("{} is not a valid index type", t).to_string()),
|
index_type => Err(InvalidIndexType {
|
||||||
|
index_type: index_type.into(),
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ use std::ops::Deref;
|
|||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
use arrow_array::{Float32Array, RecordBatchIterator};
|
use arrow_array::{Float32Array, RecordBatchIterator};
|
||||||
use arrow_ipc::writer::FileWriter;
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use futures::{TryFutureExt, TryStreamExt};
|
use futures::{TryFutureExt, TryStreamExt};
|
||||||
use lance::dataset::{WriteMode, WriteParams};
|
use lance::dataset::{WriteMode, WriteParams};
|
||||||
@@ -32,14 +31,17 @@ use once_cell::sync::OnceCell;
|
|||||||
use tokio::runtime::Runtime;
|
use tokio::runtime::Runtime;
|
||||||
|
|
||||||
use vectordb::database::Database;
|
use vectordb::database::Database;
|
||||||
use vectordb::error::Error;
|
|
||||||
use vectordb::table::{ReadParams, Table};
|
use vectordb::table::{ReadParams, Table};
|
||||||
|
|
||||||
use crate::arrow::arrow_buffer_to_record_batch;
|
use crate::arrow::{arrow_buffer_to_record_batch, record_batch_to_buffer};
|
||||||
|
use crate::error::ResultExt;
|
||||||
|
use crate::neon_ext::js_object_ext::JsObjectExt;
|
||||||
|
|
||||||
mod arrow;
|
mod arrow;
|
||||||
mod convert;
|
mod convert;
|
||||||
|
mod error;
|
||||||
mod index;
|
mod index;
|
||||||
|
mod neon_ext;
|
||||||
|
|
||||||
struct JsDatabase {
|
struct JsDatabase {
|
||||||
database: Arc<Database>,
|
database: Arc<Database>,
|
||||||
@@ -86,7 +88,7 @@ fn runtime<'a, C: Context<'a>>(cx: &mut C) -> NeonResult<&'static Runtime> {
|
|||||||
|
|
||||||
LOG.get_or_init(|| env_logger::init());
|
LOG.get_or_init(|| env_logger::init());
|
||||||
|
|
||||||
RUNTIME.get_or_try_init(|| Runtime::new().or_else(|err| cx.throw_error(err.to_string())))
|
RUNTIME.get_or_try_init(|| Runtime::new().or_throw(cx))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn database_new(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
fn database_new(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||||
@@ -101,7 +103,7 @@ fn database_new(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
let db = JsDatabase {
|
let db = JsDatabase {
|
||||||
database: Arc::new(database.or_else(|err| cx.throw_error(err.to_string()))?),
|
database: Arc::new(database.or_throw(&mut cx)?),
|
||||||
};
|
};
|
||||||
Ok(cx.boxed(db))
|
Ok(cx.boxed(db))
|
||||||
});
|
});
|
||||||
@@ -123,7 +125,7 @@ fn database_table_names(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
let tables_rst = database.table_names().await;
|
let tables_rst = database.table_names().await;
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
let tables = tables_rst.or_else(|err| cx.throw_error(err.to_string()))?;
|
let tables = tables_rst.or_throw(&mut cx)?;
|
||||||
let table_names = convert::vec_str_to_array(&tables, &mut cx);
|
let table_names = convert::vec_str_to_array(&tables, &mut cx);
|
||||||
table_names
|
table_names
|
||||||
});
|
});
|
||||||
@@ -194,9 +196,7 @@ fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
let table_rst = database.open_table_with_params(&table_name, ¶ms).await;
|
let table_rst = database.open_table_with_params(&table_name, ¶ms).await;
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
let table = Arc::new(Mutex::new(
|
let table = Arc::new(Mutex::new(table_rst.or_throw(&mut cx)?));
|
||||||
table_rst.or_else(|err| cx.throw_error(err.to_string()))?,
|
|
||||||
));
|
|
||||||
Ok(cx.boxed(JsTable { table }))
|
Ok(cx.boxed(JsTable { table }))
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -217,7 +217,7 @@ fn database_drop_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
rt.spawn(async move {
|
rt.spawn(async move {
|
||||||
let result = database.drop_table(&table_name).await;
|
let result = database.drop_table(&table_name).await;
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
result.or_else(|err| cx.throw_error(err.to_string()))?;
|
result.or_throw(&mut cx)?;
|
||||||
Ok(cx.null())
|
Ok(cx.null())
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -246,12 +246,9 @@ fn table_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
.get_opt::<JsString, _, _>(&mut cx, "_filter")?
|
.get_opt::<JsString, _, _>(&mut cx, "_filter")?
|
||||||
.map(|s| s.value(&mut cx));
|
.map(|s| s.value(&mut cx));
|
||||||
let refine_factor = query_obj
|
let refine_factor = query_obj
|
||||||
.get_opt::<JsNumber, _, _>(&mut cx, "_refineFactor")?
|
.get_opt_u32(&mut cx, "_refineFactor")
|
||||||
.map(|s| s.value(&mut cx))
|
.or_throw(&mut cx)?;
|
||||||
.map(|i| i as u32);
|
let nprobes = query_obj.get_usize(&mut cx, "_nprobes").or_throw(&mut cx)?;
|
||||||
let nprobes = query_obj
|
|
||||||
.get::<JsNumber, _, _>(&mut cx, "_nprobes")?
|
|
||||||
.value(&mut cx) as usize;
|
|
||||||
let metric_type = query_obj
|
let metric_type = query_obj
|
||||||
.get_opt::<JsString, _, _>(&mut cx, "_metricType")?
|
.get_opt::<JsString, _, _>(&mut cx, "_metricType")?
|
||||||
.map(|s| s.value(&mut cx))
|
.map(|s| s.value(&mut cx))
|
||||||
@@ -278,30 +275,17 @@ fn table_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
.select(select);
|
.select(select);
|
||||||
let record_batch_stream = builder.execute();
|
let record_batch_stream = builder.execute();
|
||||||
let results = record_batch_stream
|
let results = record_batch_stream
|
||||||
.and_then(|stream| stream.try_collect::<Vec<_>>().map_err(Error::from))
|
.and_then(|stream| {
|
||||||
|
stream
|
||||||
|
.try_collect::<Vec<_>>()
|
||||||
|
.map_err(vectordb::error::Error::from)
|
||||||
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
let results = results.or_else(|err| cx.throw_error(err.to_string()))?;
|
let results = results.or_throw(&mut cx)?;
|
||||||
let vector: Vec<u8> = Vec::new();
|
let buffer = record_batch_to_buffer(results).or_throw(&mut cx)?;
|
||||||
|
Ok(JsBuffer::external(&mut cx, buffer))
|
||||||
if results.is_empty() {
|
|
||||||
return cx.buffer(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
let schema = results.get(0).unwrap().schema();
|
|
||||||
let mut fr = FileWriter::try_new(vector, schema.deref())
|
|
||||||
.or_else(|err| cx.throw_error(err.to_string()))?;
|
|
||||||
|
|
||||||
for batch in results.iter() {
|
|
||||||
fr.write(batch)
|
|
||||||
.or_else(|err| cx.throw_error(err.to_string()))?;
|
|
||||||
}
|
|
||||||
fr.finish().or_else(|err| cx.throw_error(err.to_string()))?;
|
|
||||||
let buf = fr
|
|
||||||
.into_inner()
|
|
||||||
.or_else(|err| cx.throw_error(err.to_string()))?;
|
|
||||||
Ok(JsBuffer::external(&mut cx, buf))
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
Ok(promise)
|
Ok(promise)
|
||||||
@@ -313,7 +297,7 @@ fn table_create(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
.downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
|
.downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
|
||||||
let table_name = cx.argument::<JsString>(0)?.value(&mut cx);
|
let table_name = cx.argument::<JsString>(0)?.value(&mut cx);
|
||||||
let buffer = cx.argument::<JsBuffer>(1)?;
|
let buffer = cx.argument::<JsBuffer>(1)?;
|
||||||
let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx));
|
let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx)).or_throw(&mut cx)?;
|
||||||
let schema = batches[0].schema();
|
let schema = batches[0].schema();
|
||||||
|
|
||||||
// Write mode
|
// Write mode
|
||||||
@@ -351,9 +335,7 @@ fn table_create(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
let table = Arc::new(Mutex::new(
|
let table = Arc::new(Mutex::new(table_rst.or_throw(&mut cx)?));
|
||||||
table_rst.or_else(|err| cx.throw_error(err.to_string()))?,
|
|
||||||
));
|
|
||||||
Ok(cx.boxed(JsTable { table }))
|
Ok(cx.boxed(JsTable { table }))
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -370,7 +352,8 @@ fn table_add(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
||||||
let buffer = cx.argument::<JsBuffer>(0)?;
|
let buffer = cx.argument::<JsBuffer>(0)?;
|
||||||
let write_mode = cx.argument::<JsString>(1)?.value(&mut cx);
|
let write_mode = cx.argument::<JsString>(1)?.value(&mut cx);
|
||||||
let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx));
|
|
||||||
|
let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx)).or_throw(&mut cx)?;
|
||||||
let schema = batches[0].schema();
|
let schema = batches[0].schema();
|
||||||
|
|
||||||
let rt = runtime(&mut cx)?;
|
let rt = runtime(&mut cx)?;
|
||||||
@@ -399,7 +382,7 @@ fn table_add(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
let add_result = table.lock().unwrap().add(batch_reader, Some(params)).await;
|
let add_result = table.lock().unwrap().add(batch_reader, Some(params)).await;
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
let _added = add_result.or_else(|err| cx.throw_error(err.to_string()))?;
|
let _added = add_result.or_throw(&mut cx)?;
|
||||||
Ok(cx.boolean(true))
|
Ok(cx.boolean(true))
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -418,7 +401,7 @@ fn table_count_rows(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
let num_rows_result = table.lock().unwrap().count_rows().await;
|
let num_rows_result = table.lock().unwrap().count_rows().await;
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
let num_rows = num_rows_result.or_else(|err| cx.throw_error(err.to_string()))?;
|
let num_rows = num_rows_result.or_throw(&mut cx)?;
|
||||||
Ok(cx.number(num_rows as f64))
|
Ok(cx.number(num_rows as f64))
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -438,7 +421,7 @@ fn table_delete(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
let delete_result = rt.block_on(async move { table.lock().unwrap().delete(&predicate).await });
|
let delete_result = rt.block_on(async move { table.lock().unwrap().delete(&predicate).await });
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
delete_result.or_else(|err| cx.throw_error(err.to_string()))?;
|
delete_result.or_throw(&mut cx)?;
|
||||||
Ok(cx.undefined())
|
Ok(cx.undefined())
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
15
rust/ffi/node/src/neon_ext.rs
Normal file
15
rust/ffi/node/src/neon_ext.rs
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
// Copyright 2023 Lance Developers.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
pub mod js_object_ext;
|
||||||
82
rust/ffi/node/src/neon_ext/js_object_ext.rs
Normal file
82
rust/ffi/node/src/neon_ext/js_object_ext.rs
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
// Copyright 2023 Lance Developers.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
use crate::error::{Error, Result};
|
||||||
|
use neon::prelude::*;
|
||||||
|
|
||||||
|
// extends neon's [JsObject] with helper functions to extract properties
|
||||||
|
pub trait JsObjectExt {
|
||||||
|
fn get_opt_u32(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<u32>>;
|
||||||
|
fn get_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<usize>;
|
||||||
|
fn get_opt_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<usize>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl JsObjectExt for JsObject {
|
||||||
|
fn get_opt_u32(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<u32>> {
|
||||||
|
let val_opt = self
|
||||||
|
.get_opt::<JsNumber, _, _>(cx, key)?
|
||||||
|
.map(|s| f64_to_u32_safe(s.value(cx), key));
|
||||||
|
val_opt.transpose()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<usize> {
|
||||||
|
let val = self.get::<JsNumber, _, _>(cx, key)?.value(cx);
|
||||||
|
f64_to_usize_safe(val, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_opt_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<usize>> {
|
||||||
|
let val_opt = self
|
||||||
|
.get_opt::<JsNumber, _, _>(cx, key)?
|
||||||
|
.map(|s| f64_to_usize_safe(s.value(cx), key));
|
||||||
|
val_opt.transpose()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn f64_to_u32_safe(n: f64, key: &str) -> Result<u32> {
|
||||||
|
use conv::*;
|
||||||
|
|
||||||
|
n.approx_as::<u32>().map_err(|e| match e {
|
||||||
|
FloatError::NegOverflow(_) => Error::RangeError {
|
||||||
|
name: key.into(),
|
||||||
|
message: "must be > 0".to_string(),
|
||||||
|
},
|
||||||
|
FloatError::PosOverflow(_) => Error::RangeError {
|
||||||
|
name: key.into(),
|
||||||
|
message: format!("must be < {}", u32::MAX),
|
||||||
|
},
|
||||||
|
FloatError::NotANumber(_) => Error::RangeError {
|
||||||
|
name: key.into(),
|
||||||
|
message: "not a valid number".to_string(),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn f64_to_usize_safe(n: f64, key: &str) -> Result<usize> {
|
||||||
|
use conv::*;
|
||||||
|
|
||||||
|
n.approx_as::<usize>().map_err(|e| match e {
|
||||||
|
FloatError::NegOverflow(_) => Error::RangeError {
|
||||||
|
name: key.into(),
|
||||||
|
message: "must be > 0".to_string(),
|
||||||
|
},
|
||||||
|
FloatError::PosOverflow(_) => Error::RangeError {
|
||||||
|
name: key.into(),
|
||||||
|
message: format!("must be < {}", usize::MAX),
|
||||||
|
},
|
||||||
|
FloatError::NotANumber(_) => Error::RangeError {
|
||||||
|
name: key.into(),
|
||||||
|
message: "not a valid number".to_string(),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "vectordb"
|
name = "vectordb"
|
||||||
version = "0.1.18"
|
version = "0.1.19"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
@@ -12,7 +12,7 @@ arrow-array = { workspace = true }
|
|||||||
arrow-data = { workspace = true }
|
arrow-data = { workspace = true }
|
||||||
arrow-schema = { workspace = true }
|
arrow-schema = { workspace = true }
|
||||||
object_store = { workspace = true }
|
object_store = { workspace = true }
|
||||||
snafu = "0.7.4"
|
snafu = { workspace = true }
|
||||||
half = { workspace = true }
|
half = { workspace = true }
|
||||||
lance = { workspace = true }
|
lance = { workspace = true }
|
||||||
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
||||||
|
|||||||
Reference in New Issue
Block a user