Compare commits

..

1 Commits

Author SHA1 Message Date
rmeng
60f6dc6a64 chore: switch over to requtes for remote client 2024-01-09 22:35:15 -05:00
35 changed files with 444 additions and 989 deletions

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.4.3
current_version = 0.4.2
commit = True
message = Bump version: {current_version} → {new_version}
tag = True

View File

@@ -49,7 +49,7 @@ jobs:
timeout-minutes: 30
strategy:
matrix:
config:
config:
- name: x86 Mac
runner: macos-13
- name: Arm Mac
@@ -74,7 +74,7 @@ jobs:
run: |
pip install -e .[tests]
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
pip install pytest pytest-mock
pip install pytest pytest-mock black
- name: Run tests
run: pytest -m "not slow" -x -v --durations=30 tests
pydantic1x:

View File

@@ -5,10 +5,10 @@ exclude = ["python"]
resolver = "2"
[workspace.dependencies]
lance = { "version" = "=0.9.6", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.9.6" }
lance-linalg = { "version" = "=0.9.6" }
lance-testing = { "version" = "=0.9.6" }
lance = { "version" = "=0.9.5", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.9.5" }
lance-linalg = { "version" = "=0.9.5" }
lance-testing = { "version" = "=0.9.5" }
# Note that this one does not include pyarrow
arrow = { version = "49.0.0", optional = false }
arrow-array = "49.0"

View File

@@ -67,7 +67,7 @@ We'll cover the basics of using LanceDB on your local machine in this section.
!!! warning
If the table already exists, LanceDB will raise an error by default.
If you want to make sure you overwrite the table, pass in `mode="overwrite"`
If you want to overwrite the table, you can pass in `mode="overwrite"`
to the `createTable` function.
=== "Javascript"

View File

@@ -118,42 +118,6 @@ texts = [{"text": "Capitalism has been dominant in the Western world since the e
tbl.add(texts)
```
## Gemini Embedding Function
With Google's Gemini, you can represent text (words, sentences, and blocks of text) in a vectorized form, making it easier to compare and contrast embeddings. For example, two texts that share a similar subject matter or sentiment should have similar embeddings, which can be identified through mathematical comparison techniques such as cosine similarity. For more on how and why you should use embeddings, refer to the Embeddings guide.
The Gemini Embedding Model API supports various task types:
| Task Type | Description |
|-------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------|
| "`retrieval_query`" | Specifies the given text is a query in a search/retrieval setting. |
| "`retrieval_document`" | Specifies the given text is a document in a search/retrieval setting. Using this task type requires a title but is automatically proided by Embeddings API |
| "`semantic_similarity`" | Specifies the given text will be used for Semantic Textual Similarity (STS). |
| "`classification`" | Specifies that the embeddings will be used for classification. |
| "`clusering`" | Specifies that the embeddings will be used for clustering. |
Usage Example:
```python
import lancedb
import pandas as pd
from lancedb.pydantic import LanceModel, Vector
from lancedb.embeddings import get_registry
model = get_registry().get("gemini-text").create()
class TextModel(LanceModel):
text: str = model.SourceField()
vector: Vector(model.ndims()) = model.VectorField()
df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
db = lancedb.connect("~/.lancedb")
tbl = db.create_table("test", schema=TextModel, mode="overwrite")
tbl.add(df)
rs = tbl.search("hello").limit(1).to_pandas()
```
## Multi-modal embedding functions
Multi-modal embedding functions allow you to query your table using both images and text.

View File

@@ -31,23 +31,13 @@ This guide will show how to create tables, insert data into them, and update the
```
!!! info "Note"
If the table already exists, LanceDB will raise an error by default.
`create_table` supports an optional `exist_ok` parameter. When set to True
and the table exists, then it simply opens the existing table. The data you
passed in will NOT be appended to the table in that case.
```python
db.create_table("name", data, exist_ok=True)
```
Sometimes you want to make sure that you start fresh. If you want to
overwrite the table, you can pass in mode="overwrite" to the createTable function.
If the table already exists, LanceDB will raise an error by default. If you want to overwrite the table, you can pass in mode="overwrite" to the createTable function.
```python
db.create_table("name", data, mode="overwrite")
```
### From pandas DataFrame
```python

594
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "vectordb",
"version": "0.4.3",
"version": "0.4.2",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "vectordb",
"version": "0.4.3",
"version": "0.4.2",
"cpu": [
"x64",
"arm64"
@@ -18,9 +18,9 @@
"win32"
],
"dependencies": {
"@apache-arrow/ts": "^14.0.2",
"@apache-arrow/ts": "^12.0.0",
"@neon-rs/load": "^0.0.74",
"apache-arrow": "^14.0.2",
"apache-arrow": "^12.0.0",
"axios": "^1.4.0"
},
"devDependencies": {
@@ -53,59 +53,39 @@
"uuid": "^9.0.0"
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.4.3",
"@lancedb/vectordb-darwin-x64": "0.4.3",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.3",
"@lancedb/vectordb-linux-x64-gnu": "0.4.3",
"@lancedb/vectordb-win32-x64-msvc": "0.4.3"
}
},
"node_modules/@75lb/deep-merge": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@75lb/deep-merge/-/deep-merge-1.1.1.tgz",
"integrity": "sha512-xvgv6pkMGBA6GwdyJbNAnDmfAIR/DfWhrj9jgWh3TY7gRm3KO46x/GPjRg6wJ0nOepwqrNxFfojebh0Df4h4Tw==",
"dependencies": {
"lodash.assignwith": "^4.2.0",
"typical": "^7.1.1"
},
"engines": {
"node": ">=12.17"
}
},
"node_modules/@75lb/deep-merge/node_modules/typical": {
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz",
"integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA==",
"engines": {
"node": ">=12.17"
"@lancedb/vectordb-darwin-arm64": "0.4.2",
"@lancedb/vectordb-darwin-x64": "0.4.2",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.2",
"@lancedb/vectordb-linux-x64-gnu": "0.4.2",
"@lancedb/vectordb-win32-x64-msvc": "0.4.2"
}
},
"node_modules/@apache-arrow/ts": {
"version": "14.0.2",
"resolved": "https://registry.npmjs.org/@apache-arrow/ts/-/ts-14.0.2.tgz",
"integrity": "sha512-CtwAvLkK0CZv7xsYeCo91ml6PvlfzAmAJZkRYuz2GNBwfYufj5SVi0iuSMwIMkcU/szVwvLdzORSLa5PlF/2ug==",
"version": "12.0.0",
"resolved": "https://registry.npmjs.org/@apache-arrow/ts/-/ts-12.0.0.tgz",
"integrity": "sha512-ArJ3Fw5W9RAeNWuyCU2CdjL/nEAZSVDG1p3jz/ZtLo/q3NTz2w7HUCOJeszejH/5alGX+QirYrJ5c6BW++/P7g==",
"dependencies": {
"@types/command-line-args": "5.2.0",
"@types/command-line-usage": "5.0.2",
"@types/node": "20.3.0",
"@types/node": "18.14.5",
"@types/pad-left": "2.1.1",
"command-line-args": "5.2.1",
"command-line-usage": "7.0.1",
"flatbuffers": "23.5.26",
"command-line-usage": "6.1.3",
"flatbuffers": "23.3.3",
"json-bignum": "^0.0.3",
"pad-left": "^2.1.0",
"tslib": "^2.5.3"
"tslib": "^2.5.0"
}
},
"node_modules/@apache-arrow/ts/node_modules/@types/node": {
"version": "20.3.0",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.3.0.tgz",
"integrity": "sha512-cumHmIAf6On83X7yP+LrsEyUOf/YlociZelmpRYaGFydoaPdxdt80MAbu6vWerQT2COCp2nPvHdsbD7tHn/YlQ=="
"version": "18.14.5",
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.14.5.tgz",
"integrity": "sha512-CRT4tMK/DHYhw1fcCEBwME9CSaZNclxfzVMe7GsO6ULSwsttbj70wSiX6rZdIjGblu93sTJxLdhNIT85KKI7Qw=="
},
"node_modules/@apache-arrow/ts/node_modules/tslib": {
"version": "2.6.2",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz",
"integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q=="
"version": "2.5.0",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz",
"integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg=="
},
"node_modules/@cargo-messages/android-arm-eabi": {
"version": "0.0.160",
@@ -337,9 +317,9 @@
}
},
"node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.4.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.3.tgz",
"integrity": "sha512-47CvvSaV1EdUsFEpXUJApTk+hMzAhCxVizipCFUlXCgcmzpCDL86wNgJij/X9a+j6zADhIX//Lsu0qd/an/Bpw==",
"version": "0.4.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.2.tgz",
"integrity": "sha512-Ec73W2IHnZK4VC8g/7JyLbgcwcpNb9YI20yEhfTjEEFjJKoElZhDD/ZgghC3QQSRnrXFTxDzPK1V9BDT5QB2Hg==",
"cpu": [
"arm64"
],
@@ -349,9 +329,9 @@
]
},
"node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.4.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.3.tgz",
"integrity": "sha512-UlZZv8CmJIuRJNJG+Y1VmFsGyPR8W/72Q5EwgMMsSES6zpMQ9pNdBDWhL3UGX6nMRgnbprkwYiWJ3xHhJvtqtw==",
"version": "0.4.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.2.tgz",
"integrity": "sha512-tj0JJlOfOdeSAfmM7EZhrhFdCFjoq9Bmrjt4741BNjtF+Nv4Otl53lFtUQrexTr4oh/E1yY1qaydJ7K++8u3UA==",
"cpu": [
"x64"
],
@@ -361,9 +341,9 @@
]
},
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.4.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.3.tgz",
"integrity": "sha512-L6NVJr/lKEd8+904FzZNpT8BGQMs2cHNYbGJMIaVvGnMiIJgKAFKtOyGtdDjoe1xRZoEw21yjRGksGbnRO5wHQ==",
"version": "0.4.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.2.tgz",
"integrity": "sha512-OQ7ra5Q5RrLLwxIyI338KfQ2sSl8NJfqAHWvwiMtjCYFFYxIJGjX7U0I2MjSEPqJ5/ZoyjV4mjsvs0G1q20u+Q==",
"cpu": [
"arm64"
],
@@ -373,9 +353,9 @@
]
},
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.4.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.3.tgz",
"integrity": "sha512-OBx3WF3pK0xNfFJeErmuD9R2QWLa3XdeZspyTsIrQmBDeKj3HKh8y7Scpx4NH5Y09+9JNqRRKRZN7OqWTYhITg==",
"version": "0.4.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.2.tgz",
"integrity": "sha512-9tgIFSOYqNJzonnYsQr7v2gGdJm8aZ62UsVX2SWAIVhypoP4A05tAlbzjBgKO3R5xy5gpcW8tt/Pt8IsYWON7Q==",
"cpu": [
"x64"
],
@@ -385,9 +365,9 @@
]
},
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.4.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.3.tgz",
"integrity": "sha512-n9IvR81NXZKnSN91mrgeXbEyCiGM+YLJpOgbdHoEtMP04VDnS+iSU4jGOtQBKErvWeCJQaGFQ9qzdcVchpRGyw==",
"version": "0.4.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.2.tgz",
"integrity": "sha512-jhG3MqZ3r8BexXANLRNX57RAnCZT9psdSBORG3KTu5qe2xaunRlJNSA2kk8a79tf+gtUT/BAmMiXMzAi/dwq8w==",
"cpu": [
"x64"
],
@@ -886,6 +866,7 @@
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
"dev": true,
"dependencies": {
"color-convert": "^2.0.1"
},
@@ -910,34 +891,34 @@
}
},
"node_modules/apache-arrow": {
"version": "14.0.2",
"resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-14.0.2.tgz",
"integrity": "sha512-EBO2xJN36/XoY81nhLcwCJgFwkboDZeyNQ+OPsG7bCoQjc2BT0aTyH/MR6SrL+LirSNz+cYqjGRlupMMlP1aEg==",
"version": "12.0.0",
"resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-12.0.0.tgz",
"integrity": "sha512-uI+hnZZsGfNJiR/wG8j5yPQuDjmOHx4hZpkA743G4x3TlFrCpA3MMX7KUkIOIw0e/CwZ8NYuaMzaQsblA47qVA==",
"dependencies": {
"@types/command-line-args": "5.2.0",
"@types/command-line-usage": "5.0.2",
"@types/node": "20.3.0",
"@types/node": "18.14.5",
"@types/pad-left": "2.1.1",
"command-line-args": "5.2.1",
"command-line-usage": "7.0.1",
"flatbuffers": "23.5.26",
"command-line-usage": "6.1.3",
"flatbuffers": "23.3.3",
"json-bignum": "^0.0.3",
"pad-left": "^2.1.0",
"tslib": "^2.5.3"
"tslib": "^2.5.0"
},
"bin": {
"arrow2csv": "bin/arrow2csv.js"
}
},
"node_modules/apache-arrow/node_modules/@types/node": {
"version": "20.3.0",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.3.0.tgz",
"integrity": "sha512-cumHmIAf6On83X7yP+LrsEyUOf/YlociZelmpRYaGFydoaPdxdt80MAbu6vWerQT2COCp2nPvHdsbD7tHn/YlQ=="
"version": "18.14.5",
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.14.5.tgz",
"integrity": "sha512-CRT4tMK/DHYhw1fcCEBwME9CSaZNclxfzVMe7GsO6ULSwsttbj70wSiX6rZdIjGblu93sTJxLdhNIT85KKI7Qw=="
},
"node_modules/apache-arrow/node_modules/tslib": {
"version": "2.6.2",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz",
"integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q=="
"version": "2.5.0",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz",
"integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg=="
},
"node_modules/arg": {
"version": "4.1.3",
@@ -1189,6 +1170,7 @@
"version": "4.1.2",
"resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
"integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==",
"dev": true,
"dependencies": {
"ansi-styles": "^4.1.0",
"supports-color": "^7.1.0"
@@ -1200,24 +1182,11 @@
"url": "https://github.com/chalk/chalk?sponsor=1"
}
},
"node_modules/chalk-template": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/chalk-template/-/chalk-template-0.4.0.tgz",
"integrity": "sha512-/ghrgmhfY8RaSdeo43hNXxpoHAtxdbskUHjPpfqUWGttFgycUhYPGx3YZBCnUCvOa7Doivn1IZec3DEGFoMgLg==",
"dependencies": {
"chalk": "^4.1.2"
},
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/chalk/chalk-template?sponsor=1"
}
},
"node_modules/chalk/node_modules/supports-color": {
"version": "7.2.0",
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
"integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
"dev": true,
"dependencies": {
"has-flag": "^4.0.0"
},
@@ -1276,6 +1245,7 @@
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
"integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
"dev": true,
"dependencies": {
"color-name": "~1.1.4"
},
@@ -1286,7 +1256,8 @@
"node_modules/color-name": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
"dev": true
},
"node_modules/combined-stream": {
"version": "1.0.8",
@@ -1314,33 +1285,97 @@
}
},
"node_modules/command-line-usage": {
"version": "7.0.1",
"resolved": "https://registry.npmjs.org/command-line-usage/-/command-line-usage-7.0.1.tgz",
"integrity": "sha512-NCyznE//MuTjwi3y84QVUGEOT+P5oto1e1Pk/jFPVdPPfsG03qpTIl3yw6etR+v73d0lXsoojRpvbru2sqePxQ==",
"version": "6.1.3",
"resolved": "https://registry.npmjs.org/command-line-usage/-/command-line-usage-6.1.3.tgz",
"integrity": "sha512-sH5ZSPr+7UStsloltmDh7Ce5fb8XPlHyoPzTpyyMuYCtervL65+ubVZ6Q61cFtFl62UyJlc8/JwERRbAFPUqgw==",
"dependencies": {
"array-back": "^6.2.2",
"chalk-template": "^0.4.0",
"table-layout": "^3.0.0",
"typical": "^7.1.1"
"array-back": "^4.0.2",
"chalk": "^2.4.2",
"table-layout": "^1.0.2",
"typical": "^5.2.0"
},
"engines": {
"node": ">=12.20.0"
"node": ">=8.0.0"
}
},
"node_modules/command-line-usage/node_modules/ansi-styles": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
"integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
"dependencies": {
"color-convert": "^1.9.0"
},
"engines": {
"node": ">=4"
}
},
"node_modules/command-line-usage/node_modules/array-back": {
"version": "6.2.2",
"resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz",
"integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw==",
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/array-back/-/array-back-4.0.2.tgz",
"integrity": "sha512-NbdMezxqf94cnNfWLL7V/im0Ub+Anbb0IoZhvzie8+4HJ4nMQuzHuy49FkGYCJK2yAloZ3meiB6AVMClbrI1vg==",
"engines": {
"node": ">=12.17"
"node": ">=8"
}
},
"node_modules/command-line-usage/node_modules/chalk": {
"version": "2.4.2",
"resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
"integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
"dependencies": {
"ansi-styles": "^3.2.1",
"escape-string-regexp": "^1.0.5",
"supports-color": "^5.3.0"
},
"engines": {
"node": ">=4"
}
},
"node_modules/command-line-usage/node_modules/color-convert": {
"version": "1.9.3",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
"integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
"dependencies": {
"color-name": "1.1.3"
}
},
"node_modules/command-line-usage/node_modules/color-name": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
"integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw=="
},
"node_modules/command-line-usage/node_modules/escape-string-regexp": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
"integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==",
"engines": {
"node": ">=0.8.0"
}
},
"node_modules/command-line-usage/node_modules/has-flag": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
"integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==",
"engines": {
"node": ">=4"
}
},
"node_modules/command-line-usage/node_modules/supports-color": {
"version": "5.5.0",
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
"integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==",
"dependencies": {
"has-flag": "^3.0.0"
},
"engines": {
"node": ">=4"
}
},
"node_modules/command-line-usage/node_modules/typical": {
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz",
"integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA==",
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/typical/-/typical-5.2.0.tgz",
"integrity": "sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg==",
"engines": {
"node": ">=12.17"
"node": ">=8"
}
},
"node_modules/concat-map": {
@@ -1416,6 +1451,14 @@
"node": ">=6"
}
},
"node_modules/deep-extend": {
"version": "0.6.0",
"resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz",
"integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==",
"engines": {
"node": ">=4.0.0"
}
},
"node_modules/deep-is": {
"version": "0.1.4",
"resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz",
@@ -2194,9 +2237,9 @@
}
},
"node_modules/flatbuffers": {
"version": "23.5.26",
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-23.5.26.tgz",
"integrity": "sha512-vE+SI9vrJDwi1oETtTIFldC/o9GsVKRM+s6EL0nQgxXlYV1Vc4Tk30hj4xGICftInKQKj1F3up2n8UbIVobISQ=="
"version": "23.3.3",
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-23.3.3.tgz",
"integrity": "sha512-jmreOaAT1t55keaf+Z259Tvh8tR/Srry9K8dgCgvizhKSEr6gLGgaOJI2WFL5fkOpGOGRZwxUrlFn0GCmXUy6g=="
},
"node_modules/flatted": {
"version": "3.2.7",
@@ -2492,6 +2535,7 @@
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
"integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
"dev": true,
"engines": {
"node": ">=8"
}
@@ -3004,11 +3048,6 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/lodash.assignwith": {
"version": "4.2.0",
"resolved": "https://registry.npmjs.org/lodash.assignwith/-/lodash.assignwith-4.2.0.tgz",
"integrity": "sha512-ZznplvbvtjK2gMvnQ1BR/zqPFZmS6jbK4p+6Up4xcRYA7yMIwxHCfbTcrYxXKzzqLsQ05eJPVznEW3tuwV7k1g=="
},
"node_modules/lodash.camelcase": {
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz",
@@ -3629,6 +3668,14 @@
"node": ">=8.10.0"
}
},
"node_modules/reduce-flatten": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/reduce-flatten/-/reduce-flatten-2.0.0.tgz",
"integrity": "sha512-EJ4UNY/U1t2P/2k6oqotuX2Cc3T6nxJwsM0N0asT7dhrtH1ltUxDn4NalSYmPE2rCkVpcf/X6R0wDwcFpzhd4w==",
"engines": {
"node": ">=6"
}
},
"node_modules/regexp.prototype.flags": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.0.tgz",
@@ -3918,14 +3965,6 @@
"source-map": "^0.6.0"
}
},
"node_modules/stream-read-all": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/stream-read-all/-/stream-read-all-3.0.1.tgz",
"integrity": "sha512-EWZT9XOceBPlVJRrYcykW8jyRSZYbkb/0ZK36uLEmoWVO5gxBOnntNTseNzfREsqxqdfEGQrD8SXQ3QWbBmq8A==",
"engines": {
"node": ">=10"
}
},
"node_modules/string-width": {
"version": "4.2.3",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
@@ -4043,39 +4082,33 @@
}
},
"node_modules/table-layout": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/table-layout/-/table-layout-3.0.2.tgz",
"integrity": "sha512-rpyNZYRw+/C+dYkcQ3Pr+rLxW4CfHpXjPDnG7lYhdRoUcZTUt+KEsX+94RGp/aVp/MQU35JCITv2T/beY4m+hw==",
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/table-layout/-/table-layout-1.0.2.tgz",
"integrity": "sha512-qd/R7n5rQTRFi+Zf2sk5XVVd9UQl6ZkduPFC3S7WEGJAmetDTjY3qPN50eSKzwuzEyQKy5TN2TiZdkIjos2L6A==",
"dependencies": {
"@75lb/deep-merge": "^1.1.1",
"array-back": "^6.2.2",
"command-line-args": "^5.2.1",
"command-line-usage": "^7.0.0",
"stream-read-all": "^3.0.1",
"typical": "^7.1.1",
"wordwrapjs": "^5.1.0"
},
"bin": {
"table-layout": "bin/cli.js"
"array-back": "^4.0.1",
"deep-extend": "~0.6.0",
"typical": "^5.2.0",
"wordwrapjs": "^4.0.0"
},
"engines": {
"node": ">=12.17"
"node": ">=8.0.0"
}
},
"node_modules/table-layout/node_modules/array-back": {
"version": "6.2.2",
"resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz",
"integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw==",
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/array-back/-/array-back-4.0.2.tgz",
"integrity": "sha512-NbdMezxqf94cnNfWLL7V/im0Ub+Anbb0IoZhvzie8+4HJ4nMQuzHuy49FkGYCJK2yAloZ3meiB6AVMClbrI1vg==",
"engines": {
"node": ">=12.17"
"node": ">=8"
}
},
"node_modules/table-layout/node_modules/typical": {
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz",
"integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA==",
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/typical/-/typical-5.2.0.tgz",
"integrity": "sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg==",
"engines": {
"node": ">=12.17"
"node": ">=8"
}
},
"node_modules/temp": {
@@ -4520,11 +4553,23 @@
"dev": true
},
"node_modules/wordwrapjs": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/wordwrapjs/-/wordwrapjs-5.1.0.tgz",
"integrity": "sha512-JNjcULU2e4KJwUNv6CHgI46UvDGitb6dGryHajXTDiLgg1/RiGoPSDw4kZfYnwGtEXf2ZMeIewDQgFGzkCB2Sg==",
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/wordwrapjs/-/wordwrapjs-4.0.1.tgz",
"integrity": "sha512-kKlNACbvHrkpIw6oPeYDSmdCTu2hdMHoyXLTcUKala++lx5Y+wjJ/e474Jqv5abnVmwxw08DiTuHmw69lJGksA==",
"dependencies": {
"reduce-flatten": "^2.0.0",
"typical": "^5.2.0"
},
"engines": {
"node": ">=12.17"
"node": ">=8.0.0"
}
},
"node_modules/wordwrapjs/node_modules/typical": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/typical/-/typical-5.2.0.tgz",
"integrity": "sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg==",
"engines": {
"node": ">=8"
}
},
"node_modules/workerpool": {
@@ -4645,48 +4690,32 @@
}
},
"dependencies": {
"@75lb/deep-merge": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@75lb/deep-merge/-/deep-merge-1.1.1.tgz",
"integrity": "sha512-xvgv6pkMGBA6GwdyJbNAnDmfAIR/DfWhrj9jgWh3TY7gRm3KO46x/GPjRg6wJ0nOepwqrNxFfojebh0Df4h4Tw==",
"requires": {
"lodash.assignwith": "^4.2.0",
"typical": "^7.1.1"
},
"dependencies": {
"typical": {
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz",
"integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA=="
}
}
},
"@apache-arrow/ts": {
"version": "14.0.2",
"resolved": "https://registry.npmjs.org/@apache-arrow/ts/-/ts-14.0.2.tgz",
"integrity": "sha512-CtwAvLkK0CZv7xsYeCo91ml6PvlfzAmAJZkRYuz2GNBwfYufj5SVi0iuSMwIMkcU/szVwvLdzORSLa5PlF/2ug==",
"version": "12.0.0",
"resolved": "https://registry.npmjs.org/@apache-arrow/ts/-/ts-12.0.0.tgz",
"integrity": "sha512-ArJ3Fw5W9RAeNWuyCU2CdjL/nEAZSVDG1p3jz/ZtLo/q3NTz2w7HUCOJeszejH/5alGX+QirYrJ5c6BW++/P7g==",
"requires": {
"@types/command-line-args": "5.2.0",
"@types/command-line-usage": "5.0.2",
"@types/node": "20.3.0",
"@types/node": "18.14.5",
"@types/pad-left": "2.1.1",
"command-line-args": "5.2.1",
"command-line-usage": "7.0.1",
"flatbuffers": "23.5.26",
"command-line-usage": "6.1.3",
"flatbuffers": "23.3.3",
"json-bignum": "^0.0.3",
"pad-left": "^2.1.0",
"tslib": "^2.5.3"
"tslib": "^2.5.0"
},
"dependencies": {
"@types/node": {
"version": "20.3.0",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.3.0.tgz",
"integrity": "sha512-cumHmIAf6On83X7yP+LrsEyUOf/YlociZelmpRYaGFydoaPdxdt80MAbu6vWerQT2COCp2nPvHdsbD7tHn/YlQ=="
"version": "18.14.5",
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.14.5.tgz",
"integrity": "sha512-CRT4tMK/DHYhw1fcCEBwME9CSaZNclxfzVMe7GsO6ULSwsttbj70wSiX6rZdIjGblu93sTJxLdhNIT85KKI7Qw=="
},
"tslib": {
"version": "2.6.2",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz",
"integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q=="
"version": "2.5.0",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz",
"integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg=="
}
}
},
@@ -4840,33 +4869,33 @@
}
},
"@lancedb/vectordb-darwin-arm64": {
"version": "0.4.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.3.tgz",
"integrity": "sha512-47CvvSaV1EdUsFEpXUJApTk+hMzAhCxVizipCFUlXCgcmzpCDL86wNgJij/X9a+j6zADhIX//Lsu0qd/an/Bpw==",
"version": "0.4.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.2.tgz",
"integrity": "sha512-Ec73W2IHnZK4VC8g/7JyLbgcwcpNb9YI20yEhfTjEEFjJKoElZhDD/ZgghC3QQSRnrXFTxDzPK1V9BDT5QB2Hg==",
"optional": true
},
"@lancedb/vectordb-darwin-x64": {
"version": "0.4.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.3.tgz",
"integrity": "sha512-UlZZv8CmJIuRJNJG+Y1VmFsGyPR8W/72Q5EwgMMsSES6zpMQ9pNdBDWhL3UGX6nMRgnbprkwYiWJ3xHhJvtqtw==",
"version": "0.4.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.2.tgz",
"integrity": "sha512-tj0JJlOfOdeSAfmM7EZhrhFdCFjoq9Bmrjt4741BNjtF+Nv4Otl53lFtUQrexTr4oh/E1yY1qaydJ7K++8u3UA==",
"optional": true
},
"@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.4.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.3.tgz",
"integrity": "sha512-L6NVJr/lKEd8+904FzZNpT8BGQMs2cHNYbGJMIaVvGnMiIJgKAFKtOyGtdDjoe1xRZoEw21yjRGksGbnRO5wHQ==",
"version": "0.4.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.2.tgz",
"integrity": "sha512-OQ7ra5Q5RrLLwxIyI338KfQ2sSl8NJfqAHWvwiMtjCYFFYxIJGjX7U0I2MjSEPqJ5/ZoyjV4mjsvs0G1q20u+Q==",
"optional": true
},
"@lancedb/vectordb-linux-x64-gnu": {
"version": "0.4.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.3.tgz",
"integrity": "sha512-OBx3WF3pK0xNfFJeErmuD9R2QWLa3XdeZspyTsIrQmBDeKj3HKh8y7Scpx4NH5Y09+9JNqRRKRZN7OqWTYhITg==",
"version": "0.4.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.2.tgz",
"integrity": "sha512-9tgIFSOYqNJzonnYsQr7v2gGdJm8aZ62UsVX2SWAIVhypoP4A05tAlbzjBgKO3R5xy5gpcW8tt/Pt8IsYWON7Q==",
"optional": true
},
"@lancedb/vectordb-win32-x64-msvc": {
"version": "0.4.3",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.3.tgz",
"integrity": "sha512-n9IvR81NXZKnSN91mrgeXbEyCiGM+YLJpOgbdHoEtMP04VDnS+iSU4jGOtQBKErvWeCJQaGFQ9qzdcVchpRGyw==",
"version": "0.4.2",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.2.tgz",
"integrity": "sha512-jhG3MqZ3r8BexXANLRNX57RAnCZT9psdSBORG3KTu5qe2xaunRlJNSA2kk8a79tf+gtUT/BAmMiXMzAi/dwq8w==",
"optional": true
},
"@neon-rs/cli": {
@@ -5239,6 +5268,7 @@
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
"integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
"dev": true,
"requires": {
"color-convert": "^2.0.1"
}
@@ -5254,31 +5284,31 @@
}
},
"apache-arrow": {
"version": "14.0.2",
"resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-14.0.2.tgz",
"integrity": "sha512-EBO2xJN36/XoY81nhLcwCJgFwkboDZeyNQ+OPsG7bCoQjc2BT0aTyH/MR6SrL+LirSNz+cYqjGRlupMMlP1aEg==",
"version": "12.0.0",
"resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-12.0.0.tgz",
"integrity": "sha512-uI+hnZZsGfNJiR/wG8j5yPQuDjmOHx4hZpkA743G4x3TlFrCpA3MMX7KUkIOIw0e/CwZ8NYuaMzaQsblA47qVA==",
"requires": {
"@types/command-line-args": "5.2.0",
"@types/command-line-usage": "5.0.2",
"@types/node": "20.3.0",
"@types/node": "18.14.5",
"@types/pad-left": "2.1.1",
"command-line-args": "5.2.1",
"command-line-usage": "7.0.1",
"flatbuffers": "23.5.26",
"command-line-usage": "6.1.3",
"flatbuffers": "23.3.3",
"json-bignum": "^0.0.3",
"pad-left": "^2.1.0",
"tslib": "^2.5.3"
"tslib": "^2.5.0"
},
"dependencies": {
"@types/node": {
"version": "20.3.0",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.3.0.tgz",
"integrity": "sha512-cumHmIAf6On83X7yP+LrsEyUOf/YlociZelmpRYaGFydoaPdxdt80MAbu6vWerQT2COCp2nPvHdsbD7tHn/YlQ=="
"version": "18.14.5",
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.14.5.tgz",
"integrity": "sha512-CRT4tMK/DHYhw1fcCEBwME9CSaZNclxfzVMe7GsO6ULSwsttbj70wSiX6rZdIjGblu93sTJxLdhNIT85KKI7Qw=="
},
"tslib": {
"version": "2.6.2",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz",
"integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q=="
"version": "2.5.0",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz",
"integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg=="
}
}
},
@@ -5475,6 +5505,7 @@
"version": "4.1.2",
"resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
"integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==",
"dev": true,
"requires": {
"ansi-styles": "^4.1.0",
"supports-color": "^7.1.0"
@@ -5484,20 +5515,13 @@
"version": "7.2.0",
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
"integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
"dev": true,
"requires": {
"has-flag": "^4.0.0"
}
}
}
},
"chalk-template": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/chalk-template/-/chalk-template-0.4.0.tgz",
"integrity": "sha512-/ghrgmhfY8RaSdeo43hNXxpoHAtxdbskUHjPpfqUWGttFgycUhYPGx3YZBCnUCvOa7Doivn1IZec3DEGFoMgLg==",
"requires": {
"chalk": "^4.1.2"
}
},
"check-error": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.2.tgz",
@@ -5535,6 +5559,7 @@
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
"integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
"dev": true,
"requires": {
"color-name": "~1.1.4"
}
@@ -5542,7 +5567,8 @@
"color-name": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
"dev": true
},
"combined-stream": {
"version": "1.0.8",
@@ -5564,25 +5590,74 @@
}
},
"command-line-usage": {
"version": "7.0.1",
"resolved": "https://registry.npmjs.org/command-line-usage/-/command-line-usage-7.0.1.tgz",
"integrity": "sha512-NCyznE//MuTjwi3y84QVUGEOT+P5oto1e1Pk/jFPVdPPfsG03qpTIl3yw6etR+v73d0lXsoojRpvbru2sqePxQ==",
"version": "6.1.3",
"resolved": "https://registry.npmjs.org/command-line-usage/-/command-line-usage-6.1.3.tgz",
"integrity": "sha512-sH5ZSPr+7UStsloltmDh7Ce5fb8XPlHyoPzTpyyMuYCtervL65+ubVZ6Q61cFtFl62UyJlc8/JwERRbAFPUqgw==",
"requires": {
"array-back": "^6.2.2",
"chalk-template": "^0.4.0",
"table-layout": "^3.0.0",
"typical": "^7.1.1"
"array-back": "^4.0.2",
"chalk": "^2.4.2",
"table-layout": "^1.0.2",
"typical": "^5.2.0"
},
"dependencies": {
"ansi-styles": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
"integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
"requires": {
"color-convert": "^1.9.0"
}
},
"array-back": {
"version": "6.2.2",
"resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz",
"integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw=="
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/array-back/-/array-back-4.0.2.tgz",
"integrity": "sha512-NbdMezxqf94cnNfWLL7V/im0Ub+Anbb0IoZhvzie8+4HJ4nMQuzHuy49FkGYCJK2yAloZ3meiB6AVMClbrI1vg=="
},
"chalk": {
"version": "2.4.2",
"resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
"integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
"requires": {
"ansi-styles": "^3.2.1",
"escape-string-regexp": "^1.0.5",
"supports-color": "^5.3.0"
}
},
"color-convert": {
"version": "1.9.3",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
"integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
"requires": {
"color-name": "1.1.3"
}
},
"color-name": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
"integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw=="
},
"escape-string-regexp": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
"integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg=="
},
"has-flag": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
"integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw=="
},
"supports-color": {
"version": "5.5.0",
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
"integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==",
"requires": {
"has-flag": "^3.0.0"
}
},
"typical": {
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz",
"integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA=="
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/typical/-/typical-5.2.0.tgz",
"integrity": "sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg=="
}
}
},
@@ -5641,6 +5716,11 @@
"type-detect": "^4.0.0"
}
},
"deep-extend": {
"version": "0.6.0",
"resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz",
"integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA=="
},
"deep-is": {
"version": "0.1.4",
"resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz",
@@ -6217,9 +6297,9 @@
}
},
"flatbuffers": {
"version": "23.5.26",
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-23.5.26.tgz",
"integrity": "sha512-vE+SI9vrJDwi1oETtTIFldC/o9GsVKRM+s6EL0nQgxXlYV1Vc4Tk30hj4xGICftInKQKj1F3up2n8UbIVobISQ=="
"version": "23.3.3",
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-23.3.3.tgz",
"integrity": "sha512-jmreOaAT1t55keaf+Z259Tvh8tR/Srry9K8dgCgvizhKSEr6gLGgaOJI2WFL5fkOpGOGRZwxUrlFn0GCmXUy6g=="
},
"flatted": {
"version": "3.2.7",
@@ -6422,7 +6502,8 @@
"has-flag": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
"integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ=="
"integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
"dev": true
},
"has-property-descriptors": {
"version": "1.0.0",
@@ -6775,11 +6856,6 @@
"p-locate": "^5.0.0"
}
},
"lodash.assignwith": {
"version": "4.2.0",
"resolved": "https://registry.npmjs.org/lodash.assignwith/-/lodash.assignwith-4.2.0.tgz",
"integrity": "sha512-ZznplvbvtjK2gMvnQ1BR/zqPFZmS6jbK4p+6Up4xcRYA7yMIwxHCfbTcrYxXKzzqLsQ05eJPVznEW3tuwV7k1g=="
},
"lodash.camelcase": {
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz",
@@ -7247,6 +7323,11 @@
"picomatch": "^2.2.1"
}
},
"reduce-flatten": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/reduce-flatten/-/reduce-flatten-2.0.0.tgz",
"integrity": "sha512-EJ4UNY/U1t2P/2k6oqotuX2Cc3T6nxJwsM0N0asT7dhrtH1ltUxDn4NalSYmPE2rCkVpcf/X6R0wDwcFpzhd4w=="
},
"regexp.prototype.flags": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.0.tgz",
@@ -7442,11 +7523,6 @@
"source-map": "^0.6.0"
}
},
"stream-read-all": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/stream-read-all/-/stream-read-all-3.0.1.tgz",
"integrity": "sha512-EWZT9XOceBPlVJRrYcykW8jyRSZYbkb/0ZK36uLEmoWVO5gxBOnntNTseNzfREsqxqdfEGQrD8SXQ3QWbBmq8A=="
},
"string-width": {
"version": "4.2.3",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
@@ -7528,28 +7604,25 @@
"dev": true
},
"table-layout": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/table-layout/-/table-layout-3.0.2.tgz",
"integrity": "sha512-rpyNZYRw+/C+dYkcQ3Pr+rLxW4CfHpXjPDnG7lYhdRoUcZTUt+KEsX+94RGp/aVp/MQU35JCITv2T/beY4m+hw==",
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/table-layout/-/table-layout-1.0.2.tgz",
"integrity": "sha512-qd/R7n5rQTRFi+Zf2sk5XVVd9UQl6ZkduPFC3S7WEGJAmetDTjY3qPN50eSKzwuzEyQKy5TN2TiZdkIjos2L6A==",
"requires": {
"@75lb/deep-merge": "^1.1.1",
"array-back": "^6.2.2",
"command-line-args": "^5.2.1",
"command-line-usage": "^7.0.0",
"stream-read-all": "^3.0.1",
"typical": "^7.1.1",
"wordwrapjs": "^5.1.0"
"array-back": "^4.0.1",
"deep-extend": "~0.6.0",
"typical": "^5.2.0",
"wordwrapjs": "^4.0.0"
},
"dependencies": {
"array-back": {
"version": "6.2.2",
"resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz",
"integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw=="
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/array-back/-/array-back-4.0.2.tgz",
"integrity": "sha512-NbdMezxqf94cnNfWLL7V/im0Ub+Anbb0IoZhvzie8+4HJ4nMQuzHuy49FkGYCJK2yAloZ3meiB6AVMClbrI1vg=="
},
"typical": {
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz",
"integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA=="
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/typical/-/typical-5.2.0.tgz",
"integrity": "sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg=="
}
}
},
@@ -7867,9 +7940,20 @@
"dev": true
},
"wordwrapjs": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/wordwrapjs/-/wordwrapjs-5.1.0.tgz",
"integrity": "sha512-JNjcULU2e4KJwUNv6CHgI46UvDGitb6dGryHajXTDiLgg1/RiGoPSDw4kZfYnwGtEXf2ZMeIewDQgFGzkCB2Sg=="
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/wordwrapjs/-/wordwrapjs-4.0.1.tgz",
"integrity": "sha512-kKlNACbvHrkpIw6oPeYDSmdCTu2hdMHoyXLTcUKala++lx5Y+wjJ/e474Jqv5abnVmwxw08DiTuHmw69lJGksA==",
"requires": {
"reduce-flatten": "^2.0.0",
"typical": "^5.2.0"
},
"dependencies": {
"typical": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/typical/-/typical-5.2.0.tgz",
"integrity": "sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg=="
}
}
},
"workerpool": {
"version": "6.2.1",

View File

@@ -1,6 +1,6 @@
{
"name": "vectordb",
"version": "0.4.3",
"version": "0.4.2",
"description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js",
"types": "dist/index.d.ts",
@@ -57,9 +57,9 @@
"uuid": "^9.0.0"
},
"dependencies": {
"@apache-arrow/ts": "^14.0.2",
"@apache-arrow/ts": "^12.0.0",
"@neon-rs/load": "^0.0.74",
"apache-arrow": "^14.0.2",
"apache-arrow": "^12.0.0",
"axios": "^1.4.0"
},
"os": [
@@ -81,10 +81,10 @@
}
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.4.3",
"@lancedb/vectordb-darwin-x64": "0.4.3",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.3",
"@lancedb/vectordb-linux-x64-gnu": "0.4.3",
"@lancedb/vectordb-win32-x64-msvc": "0.4.3"
"@lancedb/vectordb-darwin-arm64": "0.4.2",
"@lancedb/vectordb-darwin-x64": "0.4.2",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.2",
"@lancedb/vectordb-linux-x64-gnu": "0.4.2",
"@lancedb/vectordb-win32-x64-msvc": "0.4.2"
}
}

View File

@@ -17,9 +17,10 @@ import {
Float32,
makeBuilder,
RecordBatchFileWriter,
Utf8, type Vector,
Utf8,
type Vector,
FixedSizeList,
vectorFromArray, type Schema, Table as ArrowTable, RecordBatchStreamWriter, List, Float64, RecordBatch, makeData, Struct
vectorFromArray, type Schema, Table as ArrowTable, RecordBatchStreamWriter, List, Float64
} from 'apache-arrow'
import { type EmbeddingFunction } from './index'
@@ -77,7 +78,6 @@ export async function convertToTable<T> (data: Array<Record<string, unknown>>, e
}
records[columnsKey] = listBuilder.finish().toVector()
} else {
// TODO if this is a struct field then recursively align the subfields
records[columnsKey] = vectorFromArray(values)
}
}
@@ -110,27 +110,21 @@ function newVectorType (dim: number): FixedSizeList<Float32> {
}
// Converts an Array of records into Arrow IPC format
export async function fromRecordsToBuffer<T> (data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>, schema?: Schema): Promise<Buffer> {
let table = await convertToTable(data, embeddings)
if (schema !== undefined) {
table = alignTable(table, schema)
}
export async function fromRecordsToBuffer<T> (data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Promise<Buffer> {
const table = await convertToTable(data, embeddings)
const writer = RecordBatchFileWriter.writeAll(table)
return Buffer.from(await writer.toUint8Array())
}
// Converts an Array of records into Arrow IPC stream format
export async function fromRecordsToStreamBuffer<T> (data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>, schema?: Schema): Promise<Buffer> {
let table = await convertToTable(data, embeddings)
if (schema !== undefined) {
table = alignTable(table, schema)
}
export async function fromRecordsToStreamBuffer<T> (data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Promise<Buffer> {
const table = await convertToTable(data, embeddings)
const writer = RecordBatchStreamWriter.writeAll(table)
return Buffer.from(await writer.toUint8Array())
}
// Converts an Arrow Table into Arrow IPC format
export async function fromTableToBuffer<T> (table: ArrowTable, embeddings?: EmbeddingFunction<T>, schema?: Schema): Promise<Buffer> {
export async function fromTableToBuffer<T> (table: ArrowTable, embeddings?: EmbeddingFunction<T>): Promise<Buffer> {
if (embeddings !== undefined) {
const source = table.getChild(embeddings.sourceColumn)
@@ -142,15 +136,12 @@ export async function fromTableToBuffer<T> (table: ArrowTable, embeddings?: Embe
const column = vectorFromArray(vectors, newVectorType(vectors[0].length))
table = table.assign(new ArrowTable({ vector: column }))
}
if (schema !== undefined) {
table = alignTable(table, schema)
}
const writer = RecordBatchFileWriter.writeAll(table)
return Buffer.from(await writer.toUint8Array())
}
// Converts an Arrow Table into Arrow IPC stream format
export async function fromTableToStreamBuffer<T> (table: ArrowTable, embeddings?: EmbeddingFunction<T>, schema?: Schema): Promise<Buffer> {
export async function fromTableToStreamBuffer<T> (table: ArrowTable, embeddings?: EmbeddingFunction<T>): Promise<Buffer> {
if (embeddings !== undefined) {
const source = table.getChild(embeddings.sourceColumn)
@@ -162,36 +153,10 @@ export async function fromTableToStreamBuffer<T> (table: ArrowTable, embeddings?
const column = vectorFromArray(vectors, newVectorType(vectors[0].length))
table = table.assign(new ArrowTable({ vector: column }))
}
if (schema !== undefined) {
table = alignTable(table, schema)
}
const writer = RecordBatchStreamWriter.writeAll(table)
return Buffer.from(await writer.toUint8Array())
}
function alignBatch (batch: RecordBatch, schema: Schema): RecordBatch {
const alignedChildren = []
for (const field of schema.fields) {
const indexInBatch = batch.schema.fields?.findIndex((f) => f.name === field.name)
if (indexInBatch < 0) {
throw new Error(`The column ${field.name} was not found in the Arrow Table`)
}
alignedChildren.push(batch.data.children[indexInBatch])
}
const newData = makeData({
type: new Struct(schema.fields),
length: batch.numRows,
nullCount: batch.nullCount,
children: alignedChildren
})
return new RecordBatch(schema, newData)
}
function alignTable (table: ArrowTable, schema: Schema): ArrowTable {
const alignedBatches = table.batches.map(batch => alignBatch(batch, schema))
return new ArrowTable(schema, alignedBatches)
}
// Creates an empty Arrow Table
export function createEmptyTable (schema: Schema): ArrowTable {
return new ArrowTable(schema)

View File

@@ -485,10 +485,10 @@ export class LocalConnection implements Connection {
}
buffer = await fromTableToBuffer(createEmptyTable(schema))
} else if (data instanceof ArrowTable) {
buffer = await fromTableToBuffer(data, embeddingFunction, schema)
buffer = await fromTableToBuffer(data, embeddingFunction)
} else {
// data is Array<Record<...>>
buffer = await fromRecordsToBuffer(data, embeddingFunction, schema)
buffer = await fromRecordsToBuffer(data, embeddingFunction)
}
const tbl = await tableCreate.call(this._db, name, buffer, writeOptions?.writeMode?.toString(), ...getAwsArgs(this._options()))
@@ -560,10 +560,9 @@ export class LocalTable<T = number[]> implements Table<T> {
* @return The number of rows added to the table
*/
async add (data: Array<Record<string, unknown>>): Promise<number> {
const schema = await this.schema
return tableAdd.call(
this._tbl,
await fromRecordsToBuffer(data, this._embeddings, schema),
await fromRecordsToBuffer(data, this._embeddings),
WriteMode.Append.toString(),
...getAwsArgs(this._options())
).then((newTable: any) => { this._tbl = newTable })

View File

@@ -176,26 +176,6 @@ describe('LanceDB client', function () {
assert.deepEqual(await con.tableNames(), ['vectors'])
})
it('create a table with a schema and records', async function () {
const dir = await track().mkdir('lancejs')
const con = await lancedb.connect(dir)
const schema = new Schema(
[new Field('id', new Int32()),
new Field('name', new Utf8()),
new Field('vector', new FixedSizeList(2, new Field('item', new Float32(), true)), false)
]
)
const data = [
{ vector: [0.5, 0.2], name: 'foo', id: 0 },
{ vector: [0.3, 0.1], name: 'bar', id: 1 }
]
// even thought the keys in data is out of order it should still work
const table = await con.createTable({ name: 'vectors', data, schema })
assert.equal(table.name, 'vectors')
assert.deepEqual(await con.tableNames(), ['vectors'])
})
it('create a table with a empty data array', async function () {
const dir = await track().mkdir('lancejs')
const con = await lancedb.connect(dir)
@@ -314,25 +294,6 @@ describe('LanceDB client', function () {
assert.equal(await table.countRows(), 4)
})
it('appends records with fields in a different order', async function () {
const dir = await track().mkdir('lancejs')
const con = await lancedb.connect(dir)
const data = [
{ id: 1, vector: [0.1, 0.2], price: 10, name: 'a' },
{ id: 2, vector: [1.1, 1.2], price: 50, name: 'b' }
]
const table = await con.createTable('vectors', data)
const dataAdd = [
{ id: 3, vector: [2.1, 2.2], name: 'c', price: 10 },
{ id: 4, vector: [3.1, 3.2], name: 'd', price: 50 }
]
await table.add(dataAdd)
assert.equal(await table.countRows(), 4)
})
it('overwrite all records in a table', async function () {
const uri = await createTestDB()
const con = await lancedb.connect(uri)

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.5.0
current_version = 0.4.3
commit = True
message = [python] Bump version: {current_version} → {new_version}
tag = True

View File

@@ -45,8 +45,8 @@ pytest
To run linter and automatically fix all errors:
```bash
ruff format python
ruff --fix python
black .
isort .
```
If any packages are missing, install them with:
@@ -82,4 +82,4 @@ pip install tantivy
To run the unit tests:
```bash
pytest
```
```

View File

@@ -56,7 +56,6 @@ class DBConnection(EnforceOverrides):
data: Optional[DATA] = None,
schema: Optional[Union[pa.Schema, LanceModel]] = None,
mode: str = "create",
exist_ok: bool = False,
on_bad_vectors: str = "error",
fill_value: float = 0.0,
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
@@ -87,11 +86,6 @@ class DBConnection(EnforceOverrides):
Can be either "create" or "overwrite".
By default, if the table already exists, an exception is raised.
If you want to overwrite the table, use mode="overwrite".
exist_ok: bool, default False
If a table by the same name already exists, then raise an exception
if exist_ok=False. If exist_ok=True, then open the existing table;
it will not add the provided data but will validate against any
schema that's specified.
on_bad_vectors: str, default "error"
What to do if any of the vectors are not the same size or contains NaNs.
One of "error", "drop", "fill".
@@ -325,7 +319,6 @@ class LanceDBConnection(DBConnection):
data: Optional[DATA] = None,
schema: Optional[Union[pa.Schema, LanceModel]] = None,
mode: str = "create",
exist_ok: bool = False,
on_bad_vectors: str = "error",
fill_value: float = 0.0,
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
@@ -345,7 +338,6 @@ class LanceDBConnection(DBConnection):
data,
schema,
mode=mode,
exist_ok=exist_ok,
on_bad_vectors=on_bad_vectors,
fill_value=fill_value,
embedding_functions=embedding_functions,

View File

@@ -19,5 +19,4 @@ from .open_clip import OpenClipEmbeddings
from .openai import OpenAIEmbeddings
from .registry import EmbeddingFunctionRegistry, get_registry
from .sentence_transformers import SentenceTransformerEmbeddings
from .gemini_text import GeminiText
from .utils import with_embeddings

View File

@@ -1,131 +0,0 @@
# Copyright (c) 2023. LanceDB Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from functools import cached_property
from typing import List, Union, Any
import numpy as np
from .base import TextEmbeddingFunction
from .registry import register
from .utils import api_key_not_found_help, TEXT
from lancedb.pydantic import PYDANTIC_VERSION
@register("gemini-text")
class GeminiText(TextEmbeddingFunction):
"""
An embedding function that uses the Google's Gemini API. Requires GOOGLE_API_KEY to be set.
https://ai.google.dev/docs/embeddings_guide
Supports various tasks types:
| Task Type | Description |
|-------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------|
| "`retrieval_query`" | Specifies the given text is a query in a search/retrieval setting. |
| "`retrieval_document`" | Specifies the given text is a document in a search/retrieval setting. Using this task type requires a title but is automatically proided by Embeddings API |
| "`semantic_similarity`" | Specifies the given text will be used for Semantic Textual Similarity (STS). |
| "`classification`" | Specifies that the embeddings will be used for classification. |
| "`clusering`" | Specifies that the embeddings will be used for clustering. |
Note: The supported task types might change in the Gemini API, but as long as a supported task type and its argument set is provided,
those will be delegated to the API calls.
Parameters
----------
name: str, default "models/embedding-001"
The name of the model to use. See the Gemini documentation for a list of available models.
query_task_type: str, default "retrieval_query"
Sets the task type for the queries.
source_task_type: str, default "retrieval_document"
Sets the task type for ingestion.
Examples
--------
import lancedb
import pandas as pd
from lancedb.pydantic import LanceModel, Vector
from lancedb.embeddings import get_registry
model = get_registry().get("gemini-text").create()
class TextModel(LanceModel):
text: str = model.SourceField()
vector: Vector(model.ndims()) = model.VectorField()
df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
db = lancedb.connect("~/.lancedb")
tbl = db.create_table("test", schema=TextModel, mode="overwrite")
tbl.add(df)
rs = tbl.search("hello").limit(1).to_pandas()
"""
name: str = "models/embedding-001"
query_task_type: str = "retrieval_query"
source_task_type: str = "retrieval_document"
if PYDANTIC_VERSION < (2, 0): # Pydantic 1.x compat
class Config:
keep_untouched = (cached_property,)
def ndims(self):
# TODO: fix hardcoding
return 768
def compute_query_embeddings(self, query: str, *args, **kwargs) -> List[np.array]:
return self.compute_source_embeddings(query, task_type=self.query_task_type)
def compute_source_embeddings(self, texts: TEXT, *args, **kwargs) -> List[np.array]:
texts = self.sanitize_input(texts)
task_type = (
kwargs.get("task_type") or self.source_task_type
) # assume source task type if not passed by `compute_query_embeddings`
return self.generate_embeddings(texts, task_type=task_type)
def generate_embeddings(
self, texts: Union[List[str], np.ndarray], *args, **kwargs
) -> List[np.array]:
"""
Get the embeddings for the given texts
Parameters
----------
texts: list[str] or np.ndarray (of str)
The texts to embed
"""
if (
kwargs.get("task_type") == "retrieval_document"
): # Provide a title to use existing API design
title = "Embedding of a document"
kwargs["title"] = title
return [
self.client.embed_content(model=self.name, content=text, **kwargs)[
"embedding"
]
for text in texts
]
@cached_property
def client(self):
genai = self.safe_import("google.generativeai", "google.generativeai")
if not os.environ.get("GOOGLE_API_KEY"):
api_key_not_found_help("google")
return genai

View File

@@ -10,7 +10,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from functools import cached_property
from typing import List, Union
@@ -18,7 +17,6 @@ import numpy as np
from .base import TextEmbeddingFunction
from .registry import register
from .utils import api_key_not_found_help
@register("openai")
@@ -53,7 +51,4 @@ class OpenAIEmbeddings(TextEmbeddingFunction):
@cached_property
def _openai_client(self):
openai = self.safe_import("openai")
if not os.environ.get("OPENAI_API_KEY"):
api_key_not_found_help("openai")
return openai.OpenAI()

View File

@@ -216,6 +216,7 @@ def retry_with_exponential_backoff(
exponential_base: float = 2,
jitter: bool = True,
max_retries: int = 7,
# errors: tuple = (),
):
"""Retry a function with exponential backoff.
@@ -225,6 +226,7 @@ def retry_with_exponential_backoff(
exponential_base (float): The base for exponential backoff (default is 2).
jitter (bool): Whether to add jitter to the delay (default is True).
max_retries (int): Maximum number of retries (default is 10).
errors (tuple): Tuple of specific exceptions to retry on (default is (openai.error.RateLimitError,)).
Returns:
function: The decorated function.

View File

@@ -260,41 +260,20 @@ class LanceQueryBuilder(ABC):
for row in self.to_arrow().to_pylist()
]
def to_polars(self) -> "pl.DataFrame":
"""
Execute the query and return the results as a Polars DataFrame.
In addition to the selected columns, LanceDB also returns a vector
and also the "_distance" column which is the distance between the query
vector and the returned vector.
"""
import polars as pl
return pl.from_arrow(self.to_arrow())
def limit(self, limit: Union[int, None]) -> LanceQueryBuilder:
def limit(self, limit: int) -> LanceQueryBuilder:
"""Set the maximum number of results to return.
Parameters
----------
limit: int
The maximum number of results to return.
By default the query is limited to the first 10.
Call this method and pass 0, a negative value,
or None to remove the limit.
*WARNING* if you have a large dataset, removing
the limit can potentially result in reading a
large amount of data into memory and cause
out of memory issues.
Returns
-------
LanceQueryBuilder
The LanceQueryBuilder object.
"""
if limit is None or limit <= 0:
self._limit = None
else:
self._limit = limit
self._limit = limit
return self
def select(self, columns: list) -> LanceQueryBuilder:
@@ -489,24 +468,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
def __init__(self, table: "lancedb.table.Table", query: str):
super().__init__(table)
self._query = query
self._phrase_query = False
def phrase_query(self, phrase_query: bool = True) -> LanceFtsQueryBuilder:
"""Set whether to use phrase query.
Parameters
----------
phrase_query: bool, default True
If True, then the query will be wrapped in quotes and
double quotes replaced by single quotes.
Returns
-------
LanceFtsQueryBuilder
The LanceFtsQueryBuilder object.
"""
self._phrase_query = phrase_query
return self
def to_arrow(self) -> pa.Table:
try:
@@ -529,11 +490,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
# open the index
index = tantivy.Index.open(index_path)
# get the scores and doc ids
query = self._query
if self._phrase_query:
query = query.replace('"', "'")
query = f'"{query}"'
row_ids, scores = search_index(index, query, self._limit)
row_ids, scores = search_index(index, self._query, self._limit)
if len(row_ids) == 0:
empty_schema = pa.schema([pa.field("score", pa.float32())])
return pa.Table.from_pylist([], schema=empty_schema)

View File

@@ -13,10 +13,10 @@
import functools
from typing import Any, Callable, Dict, Iterable, List, Optional, Union
from urllib.parse import urljoin
from typing import Any, Callable, Dict, Iterable, Optional, Union
import requests
import urllib.parse
import attrs
import pyarrow as pa
from pydantic import BaseModel
@@ -39,7 +39,7 @@ def _check_not_closed(f):
def _read_ipc(resp: requests.Response) -> pa.Table:
resp_body = resp.content
resp_body = resp.raw.read()
with pa.ipc.open_file(pa.BufferReader(resp_body)) as reader:
return reader.read_all()
@@ -55,15 +55,21 @@ class RestfulLanceDBClient:
@functools.cached_property
def session(self) -> requests.Session:
return requests.Session()
session = requests.session()
session.stream = True
@property
return session
@functools.cached_property
def url(self) -> str:
return (
self.host_override
or f"https://{self.db_name}.{self.region}.api.lancedb.com"
)
def _get_request_url(self, uri: str) -> str:
return urllib.parse.urljoin(self.url, uri)
def close(self):
self.session.close()
self.closed = True
@@ -79,36 +85,22 @@ class RestfulLanceDBClient:
headers["x-lancedb-database"] = self.db_name
return headers
@staticmethod
def _check_status(resp: requests.Response):
if resp.status_code == 404:
raise LanceDBClientError(f"Not found: {resp.text}")
elif 400 <= resp.status_code < 500:
raise LanceDBClientError(
f"Bad Request: {resp.status_code}, error: {resp.text}"
)
elif 500 <= resp.status_code < 600:
raise LanceDBClientError(
f"Internal Server Error: {resp.status_code}, error: {resp.text}"
)
elif resp.status_code != 200:
raise LanceDBClientError(
f"Unknown Error: {resp.status_code}, error: {resp.text}"
)
@_check_not_closed
def get(self, uri: str, params: Union[Dict[str, Any], BaseModel] = None):
"""Send a GET request and returns the deserialized response payload."""
if isinstance(params, BaseModel):
params: Dict[str, Any] = params.dict(exclude_none=True)
with self.session.get(
urljoin(self.url, uri),
resp = self.session.get(
self._get_request_url(uri),
params=params,
headers=self.headers,
# 5s connect timeout, 30s read timeout
timeout=(5.0, 30.0),
) as resp:
self._check_status(resp)
return resp.json()
)
resp.raise_for_status()
return resp.json()
@_check_not_closed
def post(
@@ -143,18 +135,23 @@ class RestfulLanceDBClient:
headers["content-type"] = content_type
if request_id is not None:
headers["x-request-id"] = request_id
with self.session.post(
urljoin(self.url, uri),
headers=headers,
resp = self.session.post(
self._get_request_url(uri),
params=params,
headers=self.headers,
# 5s connect timeout, 30s read timeout
timeout=(5.0, 30.0),
**req_kwargs,
) as resp:
self._check_status(resp)
return deserialize(resp)
)
resp.raise_for_status()
return deserialize(resp)
@_check_not_closed
def list_tables(self, limit: int, page_token: Optional[str] = None) -> List[str]:
def list_tables(
self, limit: int, page_token: Optional[str] = None
) -> Iterable[str]:
"""List all tables in the database."""
if page_token is None:
page_token = ""

View File

@@ -73,13 +73,12 @@ class RemoteDBConnection(DBConnection):
"""
while True:
result = self._client.list_tables(limit, page_token)
if len(result) > 0:
page_token = result[len(result) - 1]
else:
break
for item in result:
yield item
if len(result) < limit:
break
else:
page_token = result[len(result) - 1]
@override
def open_table(self, name: str) -> Table:
@@ -247,7 +246,6 @@ class RemoteDBConnection(DBConnection):
request_id=request_id,
content_type=ARROW_STREAM_CONTENT_TYPE,
)
return RemoteTable(self, name)
@override
@@ -259,7 +257,6 @@ class RemoteDBConnection(DBConnection):
name: str
The name of the table.
"""
self._client.post(
f"/v1/table/{name}/drop/",
)

View File

@@ -11,7 +11,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import asyncio
import uuid
from functools import cached_property
from typing import Dict, Optional, Union
@@ -115,7 +114,6 @@ class RemoteTable(Table):
resp = self._conn._client.post(
f"/v1/table/{self._name}/create_index/", data=data
)
return resp
def add(
@@ -228,19 +226,17 @@ class RemoteTable(Table):
and len(query.vector) > 0
and not isinstance(query.vector[0], float)
):
results = []
result = []
for v in query.vector:
v = list(v)
q = query.copy()
q.vector = v
results.append(self._conn._client.query(self._name, q))
result.append(self._conn._client.query(self._name, q))
return pa.concat_tables(
[add_index(r.to_arrow(), i) for i, r in enumerate(results)]
[add_index(r.to_arrow(), i) for i, r in enumerate(result)]
)
else:
result = self._conn._client.query(self._name, query)
return result.to_arrow()
return self._conn._client.query(self._name, query).to_arrow()
def delete(self, predicate: str):
"""Delete rows from the table.

View File

@@ -31,13 +31,7 @@ from .common import DATA, VEC, VECTOR_COLUMN_NAME
from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
from .pydantic import LanceModel, model_to_dict
from .query import LanceQueryBuilder, Query
from .util import (
fs_from_uri,
safe_import_pandas,
safe_import_polars,
value_to_sql,
join_uri,
)
from .util import fs_from_uri, safe_import_pandas, value_to_sql, join_uri
from .utils.events import register_event
if TYPE_CHECKING:
@@ -47,7 +41,6 @@ if TYPE_CHECKING:
pd = safe_import_pandas()
pl = safe_import_polars()
def _sanitize_data(
@@ -73,8 +66,6 @@ def _sanitize_data(
meta = data.schema.metadata if data.schema.metadata is not None else {}
meta = {k: v for k, v in meta.items() if k != b"pandas"}
data = data.replace_schema_metadata(meta)
elif pl is not None and isinstance(data, pl.DataFrame):
data = data.to_arrow()
if isinstance(data, pa.Table):
if metadata:
@@ -656,19 +647,8 @@ class LanceTable(Table):
self._dataset.restore()
self._reset_dataset()
def count_rows(self, filter: Optional[str] = None) -> int:
"""
Count the number of rows in the table.
Parameters
----------
filter: str, optional
A SQL where clause to filter the rows to count.
"""
return self._dataset.count_rows(filter)
def __len__(self):
return self.count_rows()
return self._dataset.count_rows()
def __repr__(self) -> str:
return f"LanceTable({self.name})"
@@ -697,30 +677,6 @@ class LanceTable(Table):
pa.Table"""
return self._dataset.to_table()
def to_polars(self, batch_size=None) -> "pl.LazyFrame":
"""Return the table as a polars LazyFrame.
Parameters
----------
batch_size: int, optional
Passed to polars. This is the maximum row count for
scanned pyarrow record batches
Note
----
1. This requires polars to be installed separately
2. Currently we've disabled push-down of the filters from polars
because polars pushdown into pyarrow uses pyarrow compute
expressions rather than SQl strings (which LanceDB supports)
Returns
-------
pl.LazyFrame
"""
return pl.scan_pyarrow_dataset(
self.to_lance(), allow_pyarrow_filter=False, batch_size=batch_size
)
@property
def _dataset_uri(self) -> str:
return join_uri(self._conn.uri, f"{self.name}.lance")
@@ -996,7 +952,6 @@ class LanceTable(Table):
data=None,
schema=None,
mode="create",
exist_ok=False,
on_bad_vectors: str = "error",
fill_value: float = 0.0,
embedding_functions: List[EmbeddingFunctionConfig] = None,
@@ -1036,10 +991,6 @@ class LanceTable(Table):
mode: str, default "create"
The mode to use when writing the data. Valid values are
"create", "overwrite", and "append".
exist_ok: bool, default False
If the table already exists then raise an error if False,
otherwise just open the table, it will not add the provided
data but will validate against any schema that's specified.
on_bad_vectors: str, default "error"
What to do if any of the vectors are not the same size or contains NaNs.
One of "error", "drop", "fill".
@@ -1090,24 +1041,14 @@ class LanceTable(Table):
schema = schema.with_metadata(metadata)
empty = pa.Table.from_pylist([], schema=schema)
try:
lance.write_dataset(empty, tbl._dataset_uri, schema=schema, mode=mode)
except OSError as err:
if "Dataset already exists" in str(err) and exist_ok:
if tbl.schema != schema:
raise ValueError(
f"Table {name} already exists with a different schema"
)
return tbl
raise
new_table = LanceTable(db, name)
lance.write_dataset(empty, tbl._dataset_uri, schema=schema, mode=mode)
table = LanceTable(db, name)
if data is not None:
new_table.add(data)
table.add(data)
register_event("create_table")
return new_table
return table
@classmethod
def open(cls, db, name):
@@ -1324,8 +1265,7 @@ def _sanitize_vector_column(
"""
# ChunkedArray is annoying to work with, so we combine chunks here
vec_arr = data[vector_column_name].combine_chunks()
typ = data[vector_column_name].type
if pa.types.is_list(typ) or pa.types.is_large_list(typ):
if pa.types.is_list(data[vector_column_name].type):
# if it's a variable size list array,
# we make sure the dimensions are all the same
has_jagged_ndims = len(vec_arr.values) % len(data) != 0

View File

@@ -123,15 +123,6 @@ def safe_import_pandas():
return None
def safe_import_polars():
try:
import polars as pl
return pl
except ImportError:
return None
@singledispatch
def value_to_sql(value):
raise NotImplementedError("SQL conversion is not implemented for this type")

View File

@@ -1,12 +1,13 @@
[project]
name = "lancedb"
version = "0.5.0"
version = "0.4.3"
dependencies = [
"deprecation",
"pylance==0.9.6",
"pylance==0.9.5",
"ratelimiter~=1.0",
"retry>=0.9.2",
"tqdm>=4.27.0",
"requests>=2.31,<3",
"pydantic>=1.10",
"attrs>=21.3.0",
"semver>=3.0",
@@ -48,8 +49,8 @@ classifiers = [
repository = "https://github.com/lancedb/lancedb"
[project.optional-dependencies]
tests = ["aiohttp", "pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "duckdb", "pytz", "polars"]
dev = ["ruff", "pre-commit"]
tests = ["pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "requests", "duckdb", "pytz"]
dev = ["ruff", "pre-commit", "black"]
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"]
clip = ["torch", "pillow", "open-clip"]
embeddings = ["openai>=1.6.1", "sentence-transformers", "torch", "pillow", "open-clip-torch", "cohere", "InstructorEmbedding"]
@@ -61,6 +62,9 @@ lancedb = "lancedb.cli.cli:cli"
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"
[tool.isort]
profile = "black"
[tool.ruff]
select = ["F", "E", "W", "I", "G", "TCH", "PERF"]

View File

@@ -190,48 +190,6 @@ def test_create_mode(tmp_path):
assert tbl.to_pandas().item.tolist() == ["fizz", "buzz"]
def test_create_exist_ok(tmp_path):
db = lancedb.connect(tmp_path)
data = pd.DataFrame(
{
"vector": [[3.1, 4.1], [5.9, 26.5]],
"item": ["foo", "bar"],
"price": [10.0, 20.0],
}
)
tbl = db.create_table("test", data=data)
with pytest.raises(OSError):
db.create_table("test", data=data)
# open the table but don't add more rows
tbl2 = db.create_table("test", data=data, exist_ok=True)
assert tbl.name == tbl2.name
assert tbl.schema == tbl2.schema
assert len(tbl) == len(tbl2)
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("item", pa.utf8()),
pa.field("price", pa.float64()),
]
)
tbl3 = db.create_table("test", schema=schema, exist_ok=True)
assert tbl3.schema == schema
bad_schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("item", pa.utf8()),
pa.field("price", pa.float64()),
pa.field("extra", pa.float32()),
]
)
with pytest.raises(ValueError):
db.create_table("test", schema=bad_schema, exist_ok=True)
def test_delete_table(tmp_path):
db = lancedb.connect(tmp_path)
data = pd.DataFrame(

View File

@@ -1,27 +0,0 @@
# Copyright 2023 LanceDB Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pytest
from lancedb import LanceDBConnection
# TODO: setup integ test mark and script
@pytest.mark.skip(reason="Need to set up a local server")
def test_against_local_server():
conn = LanceDBConnection("lancedb+http://localhost:10024")
table = conn.open_table("sift1m_ivf1024_pq16")
df = table.search(np.random.rand(128)).to_pandas()
assert len(df) == 10

View File

@@ -89,7 +89,7 @@ def test_openclip(tmp_path):
db = lancedb.connect(tmp_path)
registry = get_registry()
func = registry.get("open-clip").create(max_retries=0)
func = registry.get("open-clip").create()
class Images(LanceModel):
label: str
@@ -170,7 +170,7 @@ def test_cohere_embedding_function():
@pytest.mark.slow
def test_instructor_embedding(tmp_path):
model = get_registry().get("instructor").create(max_retries=0)
model = get_registry().get("instructor").create()
class TextModel(LanceModel):
text: str = model.SourceField()
@@ -182,23 +182,3 @@ def test_instructor_embedding(tmp_path):
tbl.add(df)
assert len(tbl.to_pandas()["vector"][0]) == model.ndims()
@pytest.mark.slow
@pytest.mark.skipif(
os.environ.get("GOOGLE_API_KEY") is None, reason="GOOGLE_API_KEY not set"
)
def test_gemini_embedding(tmp_path):
model = get_registry().get("gemini-text").create(max_retries=0)
class TextModel(LanceModel):
text: str = model.SourceField()
vector: Vector(model.ndims()) = model.VectorField()
df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
db = lancedb.connect(tmp_path)
tbl = db.create_table("test", schema=TextModel, mode="overwrite")
tbl.add(df)
assert len(tbl.to_pandas()["vector"][0]) == model.ndims()
assert tbl.search("hello").limit(1).to_pandas()["text"][0] == "hello world"

View File

@@ -169,16 +169,13 @@ def test_syntax(table):
table.create_fts_index("text")
with pytest.raises(ValueError, match="Syntax Error"):
table.search("they could have been dogs OR cats").limit(10).to_list()
table.search("they could have been dogs OR cats").phrase_query().limit(10).to_list()
# this should work
table.search('"they could have been dogs OR cats"').limit(10).to_list()
# this should work too
table.search('''"the cats OR dogs were not really 'pets' at all"''').limit(
10
).to_list()
table.search('the cats OR dogs were not really "pets" at all').phrase_query().limit(
10
).to_list()
table.search('the cats OR dogs were not really "pets" at all').phrase_query().limit(
10
).to_list()
with pytest.raises(ValueError, match="Syntax Error"):
table.search('''"the cats OR dogs were not really "pets" at all"''').limit(
10
).to_list()

View File

@@ -1,95 +0,0 @@
# Copyright 2023 LanceDB Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import attrs
import numpy as np
import pandas as pd
import pyarrow as pa
import pytest
from aiohttp import web
from lancedb.remote.client import RestfulLanceDBClient, VectorQuery
@attrs.define
class MockLanceDBServer:
runner: web.AppRunner = attrs.field(init=False)
site: web.TCPSite = attrs.field(init=False)
async def query_handler(self, request: web.Request) -> web.Response:
table_name = request.match_info["table_name"]
assert table_name == "test_table"
await request.json()
# TODO: do some matching
vecs = pd.Series([np.random.rand(128) for x in range(10)], name="vector")
ids = pd.Series(range(10), name="id")
df = pd.DataFrame([vecs, ids]).T
batch = pa.RecordBatch.from_pandas(
df,
schema=pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), 128)),
pa.field("id", pa.int64()),
]
),
)
sink = pa.BufferOutputStream()
with pa.ipc.new_file(sink, batch.schema) as writer:
writer.write_batch(batch)
return web.Response(body=sink.getvalue().to_pybytes())
async def setup(self):
app = web.Application()
app.add_routes([web.post("/table/{table_name}", self.query_handler)])
self.runner = web.AppRunner(app)
await self.runner.setup()
self.site = web.TCPSite(self.runner, "localhost", 8111)
async def start(self):
await self.site.start()
async def stop(self):
await self.runner.cleanup()
@pytest.mark.skip(reason="flaky somehow, fix later")
@pytest.mark.asyncio
async def test_e2e_with_mock_server():
mock_server = MockLanceDBServer()
await mock_server.setup()
await mock_server.start()
try:
client = RestfulLanceDBClient("lancedb+http://localhost:8111")
df = (
await client.query(
"test_table",
VectorQuery(
vector=np.random.rand(128).tolist(),
k=10,
_metric="L2",
columns=["id", "vector"],
),
)
).to_pandas()
assert "vector" in df.columns
assert "id" in df.columns
finally:
# make sure we don't leak resources
await mock_server.stop()

View File

@@ -18,15 +18,15 @@ from lancedb.remote.client import VectorQuery, VectorQueryResult
class FakeLanceDBClient:
def close(self):
async def close(self):
pass
def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult:
async def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult:
assert table_name == "test"
t = pa.schema([]).empty_table()
return VectorQueryResult(t)
def post(self, path: str):
async def post(self, path: str):
pass

View File

@@ -20,7 +20,6 @@ from unittest.mock import PropertyMock, patch
import lance
import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
import pytest
from pydantic import BaseModel
@@ -183,46 +182,6 @@ def test_add_pydantic_model(db):
assert len(really_flattened.columns) == 7
def test_polars(db):
data = {
"vector": [[3.1, 4.1], [5.9, 26.5]],
"item": ["foo", "bar"],
"price": [10.0, 20.0],
}
# Ingest polars dataframe
table = LanceTable.create(db, "test", data=pl.DataFrame(data))
assert len(table) == 2
result = table.to_pandas()
assert np.allclose(result["vector"].tolist(), data["vector"])
assert result["item"].tolist() == data["item"]
assert np.allclose(result["price"].tolist(), data["price"])
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), 2)),
pa.field("item", pa.large_string()),
pa.field("price", pa.float64()),
]
)
assert table.schema == schema
# search results to polars dataframe
q = [3.1, 4.1]
result = table.search(q).limit(1).to_polars()
assert np.allclose(result["vector"][0], q)
assert result["item"][0] == "foo"
assert np.allclose(result["price"][0], 10.0)
# enter table to polars dataframe
result = table.to_polars()
assert np.allclose(result.collect()["vector"].to_list(), data["vector"])
# make sure filtering isn't broken
filtered_result = result.filter(pl.col("item").is_in(["foo", "bar"])).collect()
assert len(filtered_result) == 2
def _add(table, schema):
# table = LanceTable(db, "test")
assert len(table) == 2
@@ -610,14 +569,6 @@ def test_empty_query(db):
val = df.id.iloc[0]
assert val == 1
table = LanceTable.create(db, "my_table2", data=[{"id": i} for i in range(100)])
df = table.search().select(["id"]).to_pandas()
assert len(df) == 10
df = table.search().select(["id"]).limit(None).to_pandas()
assert len(df) == 100
df = table.search().select(["id"]).limit(-1).to_pandas()
assert len(df) == 100
def test_compact_cleanup(db):
table = LanceTable.create(
@@ -646,14 +597,3 @@ def test_compact_cleanup(db):
with pytest.raises(Exception, match="Version 3 no longer exists"):
table.checkout(3)
def test_count_rows(db):
table = LanceTable.create(
db,
"my_table",
data=[{"text": "foo", "id": 0}, {"text": "bar", "id": 1}],
)
assert len(table) == 2
assert table.count_rows() == 2
assert table.count_rows(filter="text='bar'") == 1

View File

@@ -1,6 +1,6 @@
[package]
name = "vectordb-node"
version = "0.4.3"
version = "0.4.2"
description = "Serverless, low-latency vector database for AI applications"
license = "Apache-2.0"
edition = "2018"

View File

@@ -50,7 +50,7 @@ pub(crate) fn record_batch_to_buffer(batches: Vec<RecordBatch>) -> Result<Vec<u8
return Ok(Vec::new());
}
let schema = batches.first().unwrap().schema();
let schema = batches.get(0).unwrap().schema();
let mut fr = FileWriter::try_new(Vec::new(), schema.deref())?;
for batch in batches.iter() {
fr.write(batch)?

View File

@@ -1,6 +1,6 @@
[package]
name = "vectordb"
version = "0.4.3"
version = "0.4.2"
edition = "2021"
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license = "Apache-2.0"