mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 13:29:57 +00:00
Compare commits
14 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7b6d3f943b | ||
|
|
676876f4d5 | ||
|
|
fbfe2444a8 | ||
|
|
9555efacf9 | ||
|
|
513926960d | ||
|
|
cc507ca766 | ||
|
|
492d0328fe | ||
|
|
374c1e7aba | ||
|
|
30047a5566 | ||
|
|
85ccf9e22b | ||
|
|
0255221086 | ||
|
|
4ee229490c | ||
|
|
93e24f23af | ||
|
|
8f141e1e33 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.7.1"
|
current_version = "0.7.2"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
30
Cargo.toml
30
Cargo.toml
@@ -20,29 +20,29 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
|
|||||||
categories = ["database-implementations"]
|
categories = ["database-implementations"]
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.14.1", "features" = ["dynamodb"] }
|
lance = { "version" = "=0.15.0", "features" = ["dynamodb"] }
|
||||||
lance-index = { "version" = "=0.14.1" }
|
lance-index = { "version" = "=0.15.0" }
|
||||||
lance-linalg = { "version" = "=0.14.1" }
|
lance-linalg = { "version" = "=0.15.0" }
|
||||||
lance-testing = { "version" = "=0.14.1" }
|
lance-testing = { "version" = "=0.15.0" }
|
||||||
lance-datafusion = { "version" = "=0.14.1" }
|
lance-datafusion = { "version" = "=0.15.0" }
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "51.0", optional = false }
|
arrow = { version = "52.1", optional = false }
|
||||||
arrow-array = "51.0"
|
arrow-array = "52.1"
|
||||||
arrow-data = "51.0"
|
arrow-data = "52.1"
|
||||||
arrow-ipc = "51.0"
|
arrow-ipc = "52.1"
|
||||||
arrow-ord = "51.0"
|
arrow-ord = "52.1"
|
||||||
arrow-schema = "51.0"
|
arrow-schema = "52.1"
|
||||||
arrow-arith = "51.0"
|
arrow-arith = "52.1"
|
||||||
arrow-cast = "51.0"
|
arrow-cast = "52.1"
|
||||||
async-trait = "0"
|
async-trait = "0"
|
||||||
chrono = "0.4.35"
|
chrono = "0.4.35"
|
||||||
datafusion-physical-plan = "37.1"
|
datafusion-physical-plan = "40.0"
|
||||||
half = { "version" = "=2.4.1", default-features = false, features = [
|
half = { "version" = "=2.4.1", default-features = false, features = [
|
||||||
"num-traits",
|
"num-traits",
|
||||||
] }
|
] }
|
||||||
futures = "0"
|
futures = "0"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
object_store = "0.9.0"
|
object_store = "0.10.1"
|
||||||
pin-project = "1.0.7"
|
pin-project = "1.0.7"
|
||||||
snafu = "0.7.4"
|
snafu = "0.7.4"
|
||||||
url = "2"
|
url = "2"
|
||||||
|
|||||||
28
README.md
28
README.md
@@ -7,8 +7,8 @@
|
|||||||
|
|
||||||
<a href='https://github.com/lancedb/vectordb-recipes/tree/main' target="_blank"><img alt='LanceDB' src='https://img.shields.io/badge/VectorDB_Recipes-100000?style=for-the-badge&logo=LanceDB&logoColor=white&labelColor=645cfb&color=645cfb'/></a>
|
<a href='https://github.com/lancedb/vectordb-recipes/tree/main' target="_blank"><img alt='LanceDB' src='https://img.shields.io/badge/VectorDB_Recipes-100000?style=for-the-badge&logo=LanceDB&logoColor=white&labelColor=645cfb&color=645cfb'/></a>
|
||||||
<a href='https://lancedb.github.io/lancedb/' target="_blank"><img alt='lancdb' src='https://img.shields.io/badge/DOCS-100000?style=for-the-badge&logo=lancdb&logoColor=white&labelColor=645cfb&color=645cfb'/></a>
|
<a href='https://lancedb.github.io/lancedb/' target="_blank"><img alt='lancdb' src='https://img.shields.io/badge/DOCS-100000?style=for-the-badge&logo=lancdb&logoColor=white&labelColor=645cfb&color=645cfb'/></a>
|
||||||
[](https://blog.lancedb.com/)
|
[](https://blog.lancedb.com/)
|
||||||
[](https://discord.gg/zMM32dvNtd)
|
[](https://discord.gg/zMM32dvNtd)
|
||||||
[](https://twitter.com/lancedb)
|
[](https://twitter.com/lancedb)
|
||||||
|
|
||||||
</p>
|
</p>
|
||||||
@@ -44,26 +44,24 @@ LanceDB's core is written in Rust 🦀 and is built using <a href="https://githu
|
|||||||
|
|
||||||
**Javascript**
|
**Javascript**
|
||||||
```shell
|
```shell
|
||||||
npm install vectordb
|
npm install @lancedb/lancedb
|
||||||
```
|
```
|
||||||
|
|
||||||
```javascript
|
```javascript
|
||||||
const lancedb = require('vectordb');
|
import * as lancedb from "@lancedb/lancedb";
|
||||||
const db = await lancedb.connect('data/sample-lancedb');
|
|
||||||
|
|
||||||
const table = await db.createTable({
|
const db = await lancedb.connect("data/sample-lancedb");
|
||||||
name: 'vectors',
|
const table = await db.createTable("vectors", [
|
||||||
data: [
|
{ id: 1, vector: [0.1, 0.2], item: "foo", price: 10 },
|
||||||
{ id: 1, vector: [0.1, 0.2], item: "foo", price: 10 },
|
{ id: 2, vector: [1.1, 1.2], item: "bar", price: 50 },
|
||||||
{ id: 2, vector: [1.1, 1.2], item: "bar", price: 50 }
|
], {mode: 'overwrite'});
|
||||||
]
|
|
||||||
})
|
|
||||||
|
|
||||||
const query = table.search([0.1, 0.3]).limit(2);
|
|
||||||
const results = await query.execute();
|
const query = table.vectorSearch([0.1, 0.3]).limit(2);
|
||||||
|
const results = await query.toArray();
|
||||||
|
|
||||||
// You can also search for rows by specific criteria without involving a vector search.
|
// You can also search for rows by specific criteria without involving a vector search.
|
||||||
const rowsByCriteria = await table.search(undefined).where("price >= 10").execute();
|
const rowsByCriteria = await table.query().where("price >= 10").toArray();
|
||||||
```
|
```
|
||||||
|
|
||||||
**Python**
|
**Python**
|
||||||
|
|||||||
@@ -100,6 +100,7 @@ nav:
|
|||||||
- Quickstart: reranking/index.md
|
- Quickstart: reranking/index.md
|
||||||
- Cohere Reranker: reranking/cohere.md
|
- Cohere Reranker: reranking/cohere.md
|
||||||
- Linear Combination Reranker: reranking/linear_combination.md
|
- Linear Combination Reranker: reranking/linear_combination.md
|
||||||
|
- Reciprocal Rank Fusion Reranker: reranking/rrf.md
|
||||||
- Cross Encoder Reranker: reranking/cross_encoder.md
|
- Cross Encoder Reranker: reranking/cross_encoder.md
|
||||||
- ColBERT Reranker: reranking/colbert.md
|
- ColBERT Reranker: reranking/colbert.md
|
||||||
- Jina Reranker: reranking/jina.md
|
- Jina Reranker: reranking/jina.md
|
||||||
@@ -185,6 +186,7 @@ nav:
|
|||||||
- Quickstart: reranking/index.md
|
- Quickstart: reranking/index.md
|
||||||
- Cohere Reranker: reranking/cohere.md
|
- Cohere Reranker: reranking/cohere.md
|
||||||
- Linear Combination Reranker: reranking/linear_combination.md
|
- Linear Combination Reranker: reranking/linear_combination.md
|
||||||
|
- Reciprocal Rank Fusion Reranker: reranking/rrf.md
|
||||||
- Cross Encoder Reranker: reranking/cross_encoder.md
|
- Cross Encoder Reranker: reranking/cross_encoder.md
|
||||||
- ColBERT Reranker: reranking/colbert.md
|
- ColBERT Reranker: reranking/colbert.md
|
||||||
- Jina Reranker: reranking/jina.md
|
- Jina Reranker: reranking/jina.md
|
||||||
|
|||||||
@@ -390,6 +390,7 @@ Supported parameters (to be passed in `create` method) are:
|
|||||||
| `query_input_type` | `str` | `"search_query"` | The type of input data to be used for the query. |
|
| `query_input_type` | `str` | `"search_query"` | The type of input data to be used for the query. |
|
||||||
|
|
||||||
Cohere supports following input types:
|
Cohere supports following input types:
|
||||||
|
|
||||||
| Input Type | Description |
|
| Input Type | Description |
|
||||||
|-------------------------|---------------------------------------|
|
|-------------------------|---------------------------------------|
|
||||||
| "`search_document`" | Used for embeddings stored in a vector|
|
| "`search_document`" | Used for embeddings stored in a vector|
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
53
docs/src/reranking/rrf.md
Normal file
53
docs/src/reranking/rrf.md
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
# Reciprocal Rank Fusion Reranker
|
||||||
|
|
||||||
|
Reciprocal Rank Fusion (RRF) is an algorithm that evaluates the search scores by leveraging the positions/rank of the documents. The implementation follows this [paper](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf).
|
||||||
|
|
||||||
|
|
||||||
|
!!! note
|
||||||
|
Supported Query Types: Hybrid
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy
|
||||||
|
import lancedb
|
||||||
|
from lancedb.embeddings import get_registry
|
||||||
|
from lancedb.pydantic import LanceModel, Vector
|
||||||
|
from lancedb.rerankers import RRFReranker
|
||||||
|
|
||||||
|
embedder = get_registry().get("sentence-transformers").create()
|
||||||
|
db = lancedb.connect("~/.lancedb")
|
||||||
|
|
||||||
|
class Schema(LanceModel):
|
||||||
|
text: str = embedder.SourceField()
|
||||||
|
vector: Vector(embedder.ndims()) = embedder.VectorField()
|
||||||
|
|
||||||
|
data = [
|
||||||
|
{"text": "hello world"},
|
||||||
|
{"text": "goodbye world"}
|
||||||
|
]
|
||||||
|
tbl = db.create_table("test", schema=Schema, mode="overwrite")
|
||||||
|
tbl.add(data)
|
||||||
|
reranker = RRFReranker()
|
||||||
|
|
||||||
|
# Run hybrid search with a reranker
|
||||||
|
tbl.create_fts_index("text", replace=True)
|
||||||
|
result = tbl.search("hello", query_type="hybrid").rerank(reranker=reranker).to_list()
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Accepted Arguments
|
||||||
|
----------------
|
||||||
|
| Argument | Type | Default | Description |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| `K` | `int` | `60` | A constant used in the RRF formula (default is 60). Experiments indicate that k = 60 was near-optimal, but that the choice is not critical |
|
||||||
|
| `return_score` | str | `"relevance"` | Options are "relevance" or "all". The type of score to return. If "relevance", will return only the `_relevance_score`. If "all", will return all scores from the vector and FTS search along with the relevance score. |
|
||||||
|
|
||||||
|
|
||||||
|
## Supported Scores for each query type
|
||||||
|
You can specify the type of scores you want the reranker to return. The following are the supported scores for each query type:
|
||||||
|
|
||||||
|
### Hybrid Search
|
||||||
|
|`return_score`| Status | Description |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `relevance` | ✅ Supported | Returned rows only have the `_relevance_score` column |
|
||||||
|
| `all` | ✅ Supported | Returned rows have vector(`_distance`) and FTS(`score`) along with Hybrid Search score(`_relevance_score`) |
|
||||||
4
node/package-lock.json
generated
4
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.7.1",
|
"version": "0.7.2",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.7.1",
|
"version": "0.7.2",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.7.1",
|
"version": "0.7.2",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
|
|||||||
@@ -13,3 +13,13 @@ __test__
|
|||||||
renovate.json
|
renovate.json
|
||||||
.idea
|
.idea
|
||||||
src
|
src
|
||||||
|
lancedb
|
||||||
|
examples
|
||||||
|
nodejs-artifacts
|
||||||
|
Cargo.toml
|
||||||
|
biome.json
|
||||||
|
build.rs
|
||||||
|
jest.config.js
|
||||||
|
native.d.ts
|
||||||
|
tsconfig.json
|
||||||
|
typedoc.json
|
||||||
759
nodejs/examples/package-lock.json
generated
759
nodejs/examples/package-lock.json
generated
@@ -9,7 +9,8 @@
|
|||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@lancedb/lancedb": "file:../"
|
"@lancedb/lancedb": "file:../",
|
||||||
|
"@xenova/transformers": "^2.17.2"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"typescript": "^5.0.0"
|
"typescript": "^5.0.0"
|
||||||
@@ -17,7 +18,7 @@
|
|||||||
},
|
},
|
||||||
"..": {
|
"..": {
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.6.0",
|
"version": "0.7.1",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -29,17 +30,16 @@
|
|||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"apache-arrow": "^15.0.0",
|
|
||||||
"axios": "^1.7.2",
|
"axios": "^1.7.2",
|
||||||
"openai": "^4.29.2",
|
|
||||||
"reflect-metadata": "^0.2.2"
|
"reflect-metadata": "^0.2.2"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"@aws-sdk/client-dynamodb": "^3.33.0",
|
||||||
"@aws-sdk/client-kms": "^3.33.0",
|
"@aws-sdk/client-kms": "^3.33.0",
|
||||||
"@aws-sdk/client-s3": "^3.33.0",
|
"@aws-sdk/client-s3": "^3.33.0",
|
||||||
"@biomejs/biome": "^1.7.3",
|
"@biomejs/biome": "^1.7.3",
|
||||||
"@jest/globals": "^29.7.0",
|
"@jest/globals": "^29.7.0",
|
||||||
"@napi-rs/cli": "^2.18.0",
|
"@napi-rs/cli": "^2.18.3",
|
||||||
"@types/axios": "^0.14.0",
|
"@types/axios": "^0.14.0",
|
||||||
"@types/jest": "^29.1.2",
|
"@types/jest": "^29.1.2",
|
||||||
"@types/tmp": "^0.2.6",
|
"@types/tmp": "^0.2.6",
|
||||||
@@ -56,12 +56,746 @@
|
|||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">= 18"
|
"node": ">= 18"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"@xenova/transformers": "^2.17.2",
|
||||||
|
"openai": "^4.29.2"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"apache-arrow": "^15.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@huggingface/jinja": {
|
||||||
|
"version": "0.2.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.2.2.tgz",
|
||||||
|
"integrity": "sha512-/KPde26khDUIPkTGU82jdtTW9UAuvUTumCAbFs/7giR0SxsvZC4hru51PBvpijH6BVkHcROcvZM/lpy5h1jRRA==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/lancedb": {
|
"node_modules/@lancedb/lancedb": {
|
||||||
"resolved": "..",
|
"resolved": "..",
|
||||||
"link": true
|
"link": true
|
||||||
},
|
},
|
||||||
|
"node_modules/@protobufjs/aspromise": {
|
||||||
|
"version": "1.1.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
|
||||||
|
"integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ=="
|
||||||
|
},
|
||||||
|
"node_modules/@protobufjs/base64": {
|
||||||
|
"version": "1.1.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
|
||||||
|
"integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg=="
|
||||||
|
},
|
||||||
|
"node_modules/@protobufjs/codegen": {
|
||||||
|
"version": "2.0.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
|
||||||
|
"integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg=="
|
||||||
|
},
|
||||||
|
"node_modules/@protobufjs/eventemitter": {
|
||||||
|
"version": "1.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
|
||||||
|
"integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q=="
|
||||||
|
},
|
||||||
|
"node_modules/@protobufjs/fetch": {
|
||||||
|
"version": "1.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
|
||||||
|
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"@protobufjs/aspromise": "^1.1.1",
|
||||||
|
"@protobufjs/inquire": "^1.1.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@protobufjs/float": {
|
||||||
|
"version": "1.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
|
||||||
|
"integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ=="
|
||||||
|
},
|
||||||
|
"node_modules/@protobufjs/inquire": {
|
||||||
|
"version": "1.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
|
||||||
|
"integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q=="
|
||||||
|
},
|
||||||
|
"node_modules/@protobufjs/path": {
|
||||||
|
"version": "1.1.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
|
||||||
|
"integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA=="
|
||||||
|
},
|
||||||
|
"node_modules/@protobufjs/pool": {
|
||||||
|
"version": "1.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
|
||||||
|
"integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw=="
|
||||||
|
},
|
||||||
|
"node_modules/@protobufjs/utf8": {
|
||||||
|
"version": "1.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
|
||||||
|
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw=="
|
||||||
|
},
|
||||||
|
"node_modules/@types/long": {
|
||||||
|
"version": "4.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz",
|
||||||
|
"integrity": "sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA=="
|
||||||
|
},
|
||||||
|
"node_modules/@types/node": {
|
||||||
|
"version": "20.14.11",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.11.tgz",
|
||||||
|
"integrity": "sha512-kprQpL8MMeszbz6ojB5/tU8PLN4kesnN8Gjzw349rDlNgsSzg90lAVj3llK99Dh7JON+t9AuscPPFW6mPbTnSA==",
|
||||||
|
"dependencies": {
|
||||||
|
"undici-types": "~5.26.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@xenova/transformers": {
|
||||||
|
"version": "2.17.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/@xenova/transformers/-/transformers-2.17.2.tgz",
|
||||||
|
"integrity": "sha512-lZmHqzrVIkSvZdKZEx7IYY51TK0WDrC8eR0c5IMnBsO8di8are1zzw8BlLhyO2TklZKLN5UffNGs1IJwT6oOqQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"@huggingface/jinja": "^0.2.2",
|
||||||
|
"onnxruntime-web": "1.14.0",
|
||||||
|
"sharp": "^0.32.0"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"onnxruntime-node": "1.14.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/b4a": {
|
||||||
|
"version": "1.6.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.6.tgz",
|
||||||
|
"integrity": "sha512-5Tk1HLk6b6ctmjIkAcU/Ujv/1WqiDl0F0JdRCR80VsOcUlHcu7pWeWRlOqQLHfDEsVx9YH/aif5AG4ehoCtTmg=="
|
||||||
|
},
|
||||||
|
"node_modules/bare-events": {
|
||||||
|
"version": "2.4.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.4.2.tgz",
|
||||||
|
"integrity": "sha512-qMKFd2qG/36aA4GwvKq8MxnPgCQAmBWmSyLWsJcbn8v03wvIPQ/hG1Ms8bPzndZxMDoHpxez5VOS+gC9Yi24/Q==",
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"node_modules/bare-fs": {
|
||||||
|
"version": "2.3.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-2.3.1.tgz",
|
||||||
|
"integrity": "sha512-W/Hfxc/6VehXlsgFtbB5B4xFcsCl+pAh30cYhoFyXErf6oGrwjh8SwiPAdHgpmWonKuYpZgGywN0SXt7dgsADA==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-events": "^2.0.0",
|
||||||
|
"bare-path": "^2.0.0",
|
||||||
|
"bare-stream": "^2.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-os": {
|
||||||
|
"version": "2.4.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-os/-/bare-os-2.4.0.tgz",
|
||||||
|
"integrity": "sha512-v8DTT08AS/G0F9xrhyLtepoo9EJBJ85FRSMbu1pQUlAf6A8T0tEEQGMVObWeqpjhSPXsE0VGlluFBJu2fdoTNg==",
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"node_modules/bare-path": {
|
||||||
|
"version": "2.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-path/-/bare-path-2.1.3.tgz",
|
||||||
|
"integrity": "sha512-lh/eITfU8hrj9Ru5quUp0Io1kJWIk1bTjzo7JH1P5dWmQ2EL4hFUlfI8FonAhSlgIfhn63p84CDY/x+PisgcXA==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-os": "^2.1.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-stream": {
|
||||||
|
"version": "2.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.1.3.tgz",
|
||||||
|
"integrity": "sha512-tiDAH9H/kP+tvNO5sczyn9ZAA7utrSMobyDchsnyyXBuUe2FSQWbxhtuHB8jwpHYYevVo2UJpcmvvjrbHboUUQ==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"streamx": "^2.18.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/base64-js": {
|
||||||
|
"version": "1.5.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||||
|
"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
|
||||||
|
"funding": [
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"url": "https://github.com/sponsors/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "patreon",
|
||||||
|
"url": "https://www.patreon.com/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "consulting",
|
||||||
|
"url": "https://feross.org/support"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"node_modules/bl": {
|
||||||
|
"version": "4.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
|
||||||
|
"integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==",
|
||||||
|
"dependencies": {
|
||||||
|
"buffer": "^5.5.0",
|
||||||
|
"inherits": "^2.0.4",
|
||||||
|
"readable-stream": "^3.4.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/buffer": {
|
||||||
|
"version": "5.7.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
|
||||||
|
"integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
|
||||||
|
"funding": [
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"url": "https://github.com/sponsors/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "patreon",
|
||||||
|
"url": "https://www.patreon.com/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "consulting",
|
||||||
|
"url": "https://feross.org/support"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"dependencies": {
|
||||||
|
"base64-js": "^1.3.1",
|
||||||
|
"ieee754": "^1.1.13"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/chownr": {
|
||||||
|
"version": "1.1.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
|
||||||
|
"integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg=="
|
||||||
|
},
|
||||||
|
"node_modules/color": {
|
||||||
|
"version": "4.2.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz",
|
||||||
|
"integrity": "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==",
|
||||||
|
"dependencies": {
|
||||||
|
"color-convert": "^2.0.1",
|
||||||
|
"color-string": "^1.9.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12.5.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/color-convert": {
|
||||||
|
"version": "2.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
|
||||||
|
"integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"color-name": "~1.1.4"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=7.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/color-name": {
|
||||||
|
"version": "1.1.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
|
||||||
|
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="
|
||||||
|
},
|
||||||
|
"node_modules/color-string": {
|
||||||
|
"version": "1.9.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz",
|
||||||
|
"integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==",
|
||||||
|
"dependencies": {
|
||||||
|
"color-name": "^1.0.0",
|
||||||
|
"simple-swizzle": "^0.2.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/decompress-response": {
|
||||||
|
"version": "6.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
|
||||||
|
"integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"mimic-response": "^3.1.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/sindresorhus"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/deep-extend": {
|
||||||
|
"version": "0.6.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz",
|
||||||
|
"integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=4.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/detect-libc": {
|
||||||
|
"version": "2.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.3.tgz",
|
||||||
|
"integrity": "sha512-bwy0MGW55bG41VqxxypOsdSdGqLwXPI/focwgTYCFMbdUiBAxLg9CFzG08sz2aqzknwiX7Hkl0bQENjg8iLByw==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=8"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/end-of-stream": {
|
||||||
|
"version": "1.4.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz",
|
||||||
|
"integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==",
|
||||||
|
"dependencies": {
|
||||||
|
"once": "^1.4.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/expand-template": {
|
||||||
|
"version": "2.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz",
|
||||||
|
"integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/fast-fifo": {
|
||||||
|
"version": "1.3.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz",
|
||||||
|
"integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ=="
|
||||||
|
},
|
||||||
|
"node_modules/flatbuffers": {
|
||||||
|
"version": "1.12.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
|
||||||
|
"integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ=="
|
||||||
|
},
|
||||||
|
"node_modules/fs-constants": {
|
||||||
|
"version": "1.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
|
||||||
|
"integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow=="
|
||||||
|
},
|
||||||
|
"node_modules/github-from-package": {
|
||||||
|
"version": "0.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz",
|
||||||
|
"integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw=="
|
||||||
|
},
|
||||||
|
"node_modules/guid-typescript": {
|
||||||
|
"version": "1.0.9",
|
||||||
|
"resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
|
||||||
|
"integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ=="
|
||||||
|
},
|
||||||
|
"node_modules/ieee754": {
|
||||||
|
"version": "1.2.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
|
||||||
|
"integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
|
||||||
|
"funding": [
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"url": "https://github.com/sponsors/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "patreon",
|
||||||
|
"url": "https://www.patreon.com/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "consulting",
|
||||||
|
"url": "https://feross.org/support"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"node_modules/inherits": {
|
||||||
|
"version": "2.0.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
|
||||||
|
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
|
||||||
|
},
|
||||||
|
"node_modules/ini": {
|
||||||
|
"version": "1.3.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
|
||||||
|
"integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew=="
|
||||||
|
},
|
||||||
|
"node_modules/is-arrayish": {
|
||||||
|
"version": "0.3.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz",
|
||||||
|
"integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ=="
|
||||||
|
},
|
||||||
|
"node_modules/long": {
|
||||||
|
"version": "4.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
|
||||||
|
"integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA=="
|
||||||
|
},
|
||||||
|
"node_modules/mimic-response": {
|
||||||
|
"version": "3.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz",
|
||||||
|
"integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/sindresorhus"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/minimist": {
|
||||||
|
"version": "1.2.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
|
||||||
|
"integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/mkdirp-classic": {
|
||||||
|
"version": "0.5.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz",
|
||||||
|
"integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A=="
|
||||||
|
},
|
||||||
|
"node_modules/napi-build-utils": {
|
||||||
|
"version": "1.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-1.0.2.tgz",
|
||||||
|
"integrity": "sha512-ONmRUqK7zj7DWX0D9ADe03wbwOBZxNAfF20PlGfCWQcD3+/MakShIHrMqx9YwPTfxDdF1zLeL+RGZiR9kGMLdg=="
|
||||||
|
},
|
||||||
|
"node_modules/node-abi": {
|
||||||
|
"version": "3.65.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.65.0.tgz",
|
||||||
|
"integrity": "sha512-ThjYBfoDNr08AWx6hGaRbfPwxKV9kVzAzOzlLKbk2CuqXE2xnCh+cbAGnwM3t8Lq4v9rUB7VfondlkBckcJrVA==",
|
||||||
|
"dependencies": {
|
||||||
|
"semver": "^7.3.5"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/node-addon-api": {
|
||||||
|
"version": "6.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-6.1.0.tgz",
|
||||||
|
"integrity": "sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA=="
|
||||||
|
},
|
||||||
|
"node_modules/once": {
|
||||||
|
"version": "1.4.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
|
||||||
|
"integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
|
||||||
|
"dependencies": {
|
||||||
|
"wrappy": "1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/onnx-proto": {
|
||||||
|
"version": "4.0.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/onnx-proto/-/onnx-proto-4.0.4.tgz",
|
||||||
|
"integrity": "sha512-aldMOB3HRoo6q/phyB6QRQxSt895HNNw82BNyZ2CMh4bjeKv7g/c+VpAFtJuEMVfYLMbRx61hbuqnKceLeDcDA==",
|
||||||
|
"dependencies": {
|
||||||
|
"protobufjs": "^6.8.8"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/onnxruntime-common": {
|
||||||
|
"version": "1.14.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.14.0.tgz",
|
||||||
|
"integrity": "sha512-3LJpegM2iMNRX2wUmtYfeX/ytfOzNwAWKSq1HbRrKc9+uqG/FsEA0bbKZl1btQeZaXhC26l44NWpNUeXPII7Ew=="
|
||||||
|
},
|
||||||
|
"node_modules/onnxruntime-node": {
|
||||||
|
"version": "1.14.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.14.0.tgz",
|
||||||
|
"integrity": "sha512-5ba7TWomIV/9b6NH/1x/8QEeowsb+jBEvFzU6z0T4mNsFwdPqXeFUM7uxC6QeSRkEbWu3qEB0VMjrvzN/0S9+w==",
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"win32",
|
||||||
|
"darwin",
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"dependencies": {
|
||||||
|
"onnxruntime-common": "~1.14.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/onnxruntime-web": {
|
||||||
|
"version": "1.14.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.14.0.tgz",
|
||||||
|
"integrity": "sha512-Kcqf43UMfW8mCydVGcX9OMXI2VN17c0p6XvR7IPSZzBf/6lteBzXHvcEVWDPmCKuGombl997HgLqj91F11DzXw==",
|
||||||
|
"dependencies": {
|
||||||
|
"flatbuffers": "^1.12.0",
|
||||||
|
"guid-typescript": "^1.0.9",
|
||||||
|
"long": "^4.0.0",
|
||||||
|
"onnx-proto": "^4.0.4",
|
||||||
|
"onnxruntime-common": "~1.14.0",
|
||||||
|
"platform": "^1.3.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/platform": {
|
||||||
|
"version": "1.3.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
|
||||||
|
"integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg=="
|
||||||
|
},
|
||||||
|
"node_modules/prebuild-install": {
|
||||||
|
"version": "7.1.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.2.tgz",
|
||||||
|
"integrity": "sha512-UnNke3IQb6sgarcZIDU3gbMeTp/9SSU1DAIkil7PrqG1vZlBtY5msYccSKSHDqa3hNg436IXK+SNImReuA1wEQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"detect-libc": "^2.0.0",
|
||||||
|
"expand-template": "^2.0.3",
|
||||||
|
"github-from-package": "0.0.0",
|
||||||
|
"minimist": "^1.2.3",
|
||||||
|
"mkdirp-classic": "^0.5.3",
|
||||||
|
"napi-build-utils": "^1.0.1",
|
||||||
|
"node-abi": "^3.3.0",
|
||||||
|
"pump": "^3.0.0",
|
||||||
|
"rc": "^1.2.7",
|
||||||
|
"simple-get": "^4.0.0",
|
||||||
|
"tar-fs": "^2.0.0",
|
||||||
|
"tunnel-agent": "^0.6.0"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"prebuild-install": "bin.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/prebuild-install/node_modules/tar-fs": {
|
||||||
|
"version": "2.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.1.tgz",
|
||||||
|
"integrity": "sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng==",
|
||||||
|
"dependencies": {
|
||||||
|
"chownr": "^1.1.1",
|
||||||
|
"mkdirp-classic": "^0.5.2",
|
||||||
|
"pump": "^3.0.0",
|
||||||
|
"tar-stream": "^2.1.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/prebuild-install/node_modules/tar-stream": {
|
||||||
|
"version": "2.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz",
|
||||||
|
"integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"bl": "^4.0.3",
|
||||||
|
"end-of-stream": "^1.4.1",
|
||||||
|
"fs-constants": "^1.0.0",
|
||||||
|
"inherits": "^2.0.3",
|
||||||
|
"readable-stream": "^3.1.1"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/protobufjs": {
|
||||||
|
"version": "6.11.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.4.tgz",
|
||||||
|
"integrity": "sha512-5kQWPaJHi1WoCpjTGszzQ32PG2F4+wRY6BmAT4Vfw56Q2FZ4YZzK20xUYQH4YkfehY1e6QSICrJquM6xXZNcrw==",
|
||||||
|
"hasInstallScript": true,
|
||||||
|
"dependencies": {
|
||||||
|
"@protobufjs/aspromise": "^1.1.2",
|
||||||
|
"@protobufjs/base64": "^1.1.2",
|
||||||
|
"@protobufjs/codegen": "^2.0.4",
|
||||||
|
"@protobufjs/eventemitter": "^1.1.0",
|
||||||
|
"@protobufjs/fetch": "^1.1.0",
|
||||||
|
"@protobufjs/float": "^1.0.2",
|
||||||
|
"@protobufjs/inquire": "^1.1.0",
|
||||||
|
"@protobufjs/path": "^1.1.2",
|
||||||
|
"@protobufjs/pool": "^1.1.0",
|
||||||
|
"@protobufjs/utf8": "^1.1.0",
|
||||||
|
"@types/long": "^4.0.1",
|
||||||
|
"@types/node": ">=13.7.0",
|
||||||
|
"long": "^4.0.0"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"pbjs": "bin/pbjs",
|
||||||
|
"pbts": "bin/pbts"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/pump": {
|
||||||
|
"version": "3.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
|
||||||
|
"integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
|
||||||
|
"dependencies": {
|
||||||
|
"end-of-stream": "^1.1.0",
|
||||||
|
"once": "^1.3.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/queue-tick": {
|
||||||
|
"version": "1.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/queue-tick/-/queue-tick-1.0.1.tgz",
|
||||||
|
"integrity": "sha512-kJt5qhMxoszgU/62PLP1CJytzd2NKetjSRnyuj31fDd3Rlcz3fzlFdFLD1SItunPwyqEOkca6GbV612BWfaBag=="
|
||||||
|
},
|
||||||
|
"node_modules/rc": {
|
||||||
|
"version": "1.2.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz",
|
||||||
|
"integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==",
|
||||||
|
"dependencies": {
|
||||||
|
"deep-extend": "^0.6.0",
|
||||||
|
"ini": "~1.3.0",
|
||||||
|
"minimist": "^1.2.0",
|
||||||
|
"strip-json-comments": "~2.0.1"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"rc": "cli.js"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/readable-stream": {
|
||||||
|
"version": "3.6.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
|
||||||
|
"integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
|
||||||
|
"dependencies": {
|
||||||
|
"inherits": "^2.0.3",
|
||||||
|
"string_decoder": "^1.1.1",
|
||||||
|
"util-deprecate": "^1.0.1"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/safe-buffer": {
|
||||||
|
"version": "5.2.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
|
||||||
|
"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
|
||||||
|
"funding": [
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"url": "https://github.com/sponsors/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "patreon",
|
||||||
|
"url": "https://www.patreon.com/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "consulting",
|
||||||
|
"url": "https://feross.org/support"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"node_modules/semver": {
|
||||||
|
"version": "7.6.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz",
|
||||||
|
"integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==",
|
||||||
|
"bin": {
|
||||||
|
"semver": "bin/semver.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/sharp": {
|
||||||
|
"version": "0.32.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/sharp/-/sharp-0.32.6.tgz",
|
||||||
|
"integrity": "sha512-KyLTWwgcR9Oe4d9HwCwNM2l7+J0dUQwn/yf7S0EnTtb0eVS4RxO0eUSvxPtzT4F3SY+C4K6fqdv/DO27sJ/v/w==",
|
||||||
|
"hasInstallScript": true,
|
||||||
|
"dependencies": {
|
||||||
|
"color": "^4.2.3",
|
||||||
|
"detect-libc": "^2.0.2",
|
||||||
|
"node-addon-api": "^6.1.0",
|
||||||
|
"prebuild-install": "^7.1.1",
|
||||||
|
"semver": "^7.5.4",
|
||||||
|
"simple-get": "^4.0.1",
|
||||||
|
"tar-fs": "^3.0.4",
|
||||||
|
"tunnel-agent": "^0.6.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=14.15.0"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://opencollective.com/libvips"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/simple-concat": {
|
||||||
|
"version": "1.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz",
|
||||||
|
"integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==",
|
||||||
|
"funding": [
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"url": "https://github.com/sponsors/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "patreon",
|
||||||
|
"url": "https://www.patreon.com/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "consulting",
|
||||||
|
"url": "https://feross.org/support"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"node_modules/simple-get": {
|
||||||
|
"version": "4.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz",
|
||||||
|
"integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==",
|
||||||
|
"funding": [
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"url": "https://github.com/sponsors/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "patreon",
|
||||||
|
"url": "https://www.patreon.com/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "consulting",
|
||||||
|
"url": "https://feross.org/support"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"dependencies": {
|
||||||
|
"decompress-response": "^6.0.0",
|
||||||
|
"once": "^1.3.1",
|
||||||
|
"simple-concat": "^1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/simple-swizzle": {
|
||||||
|
"version": "0.2.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz",
|
||||||
|
"integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==",
|
||||||
|
"dependencies": {
|
||||||
|
"is-arrayish": "^0.3.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/streamx": {
|
||||||
|
"version": "2.18.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.18.0.tgz",
|
||||||
|
"integrity": "sha512-LLUC1TWdjVdn1weXGcSxyTR3T4+acB6tVGXT95y0nGbca4t4o/ng1wKAGTljm9VicuCVLvRlqFYXYy5GwgM7sQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"fast-fifo": "^1.3.2",
|
||||||
|
"queue-tick": "^1.0.1",
|
||||||
|
"text-decoder": "^1.1.0"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"bare-events": "^2.2.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/string_decoder": {
|
||||||
|
"version": "1.3.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
|
||||||
|
"integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
|
||||||
|
"dependencies": {
|
||||||
|
"safe-buffer": "~5.2.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/strip-json-comments": {
|
||||||
|
"version": "2.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz",
|
||||||
|
"integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=0.10.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/tar-fs": {
|
||||||
|
"version": "3.0.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.0.6.tgz",
|
||||||
|
"integrity": "sha512-iokBDQQkUyeXhgPYaZxmczGPhnhXZ0CmrqI+MOb/WFGS9DW5wnfrLgtjUJBvz50vQ3qfRwJ62QVoCFu8mPVu5w==",
|
||||||
|
"dependencies": {
|
||||||
|
"pump": "^3.0.0",
|
||||||
|
"tar-stream": "^3.1.5"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"bare-fs": "^2.1.1",
|
||||||
|
"bare-path": "^2.1.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/tar-stream": {
|
||||||
|
"version": "3.1.7",
|
||||||
|
"resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz",
|
||||||
|
"integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"b4a": "^1.6.4",
|
||||||
|
"fast-fifo": "^1.2.0",
|
||||||
|
"streamx": "^2.15.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/text-decoder": {
|
||||||
|
"version": "1.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.1.1.tgz",
|
||||||
|
"integrity": "sha512-8zll7REEv4GDD3x4/0pW+ppIxSNs7H1J10IKFZsuOMscumCdM2a+toDGLPA3T+1+fLBql4zbt5z83GEQGGV5VA==",
|
||||||
|
"dependencies": {
|
||||||
|
"b4a": "^1.6.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/tunnel-agent": {
|
||||||
|
"version": "0.6.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
|
||||||
|
"integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==",
|
||||||
|
"dependencies": {
|
||||||
|
"safe-buffer": "^5.0.1"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/typescript": {
|
"node_modules/typescript": {
|
||||||
"version": "5.5.2",
|
"version": "5.5.2",
|
||||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.2.tgz",
|
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.2.tgz",
|
||||||
@@ -74,6 +808,21 @@
|
|||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=14.17"
|
"node": ">=14.17"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"node_modules/undici-types": {
|
||||||
|
"version": "5.26.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||||
|
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="
|
||||||
|
},
|
||||||
|
"node_modules/util-deprecate": {
|
||||||
|
"version": "1.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
|
||||||
|
"integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="
|
||||||
|
},
|
||||||
|
"node_modules/wrappy": {
|
||||||
|
"version": "1.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
|
||||||
|
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,7 +10,8 @@
|
|||||||
"author": "Lance Devs",
|
"author": "Lance Devs",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@lancedb/lancedb": "file:../"
|
"@lancedb/lancedb": "file:../",
|
||||||
|
"@xenova/transformers": "^2.17.2"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"typescript": "^5.0.0"
|
"typescript": "^5.0.0"
|
||||||
|
|||||||
50
nodejs/examples/sentence-transformers.js
Normal file
50
nodejs/examples/sentence-transformers.js
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
import * as lancedb from "@lancedb/lancedb";
|
||||||
|
|
||||||
|
import { LanceSchema, getRegistry } from "@lancedb/lancedb/embedding";
|
||||||
|
import { Utf8 } from "apache-arrow";
|
||||||
|
|
||||||
|
const db = await lancedb.connect("/tmp/db");
|
||||||
|
const func = await getRegistry().get("huggingface").create();
|
||||||
|
|
||||||
|
const facts = [
|
||||||
|
"Albert Einstein was a theoretical physicist.",
|
||||||
|
"The capital of France is Paris.",
|
||||||
|
"The Great Wall of China is one of the Seven Wonders of the World.",
|
||||||
|
"Python is a popular programming language.",
|
||||||
|
"Mount Everest is the highest mountain in the world.",
|
||||||
|
"Leonardo da Vinci painted the Mona Lisa.",
|
||||||
|
"Shakespeare wrote Hamlet.",
|
||||||
|
"The human body has 206 bones.",
|
||||||
|
"The speed of light is approximately 299,792 kilometers per second.",
|
||||||
|
"Water boils at 100 degrees Celsius.",
|
||||||
|
"The Earth orbits the Sun.",
|
||||||
|
"The Pyramids of Giza are located in Egypt.",
|
||||||
|
"Coffee is one of the most popular beverages in the world.",
|
||||||
|
"Tokyo is the capital city of Japan.",
|
||||||
|
"Photosynthesis is the process by which plants make their food.",
|
||||||
|
"The Pacific Ocean is the largest ocean on Earth.",
|
||||||
|
"Mozart was a prolific composer of classical music.",
|
||||||
|
"The Internet is a global network of computers.",
|
||||||
|
"Basketball is a sport played with a ball and a hoop.",
|
||||||
|
"The first computer virus was created in 1983.",
|
||||||
|
"Artificial neural networks are inspired by the human brain.",
|
||||||
|
"Deep learning is a subset of machine learning.",
|
||||||
|
"IBM's Watson won Jeopardy! in 2011.",
|
||||||
|
"The first computer programmer was Ada Lovelace.",
|
||||||
|
"The first chatbot was ELIZA, created in the 1960s.",
|
||||||
|
].map((text) => ({ text }));
|
||||||
|
|
||||||
|
const factsSchema = LanceSchema({
|
||||||
|
text: func.sourceField(new Utf8()),
|
||||||
|
vector: func.vectorField(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const tbl = await db.createTable("facts", facts, {
|
||||||
|
mode: "overwrite",
|
||||||
|
schema: factsSchema,
|
||||||
|
});
|
||||||
|
|
||||||
|
const query = "How many bones are in the human body?";
|
||||||
|
const actual = await tbl.search(query).limit(1).toArray();
|
||||||
|
|
||||||
|
console.log("Answer: ", actual[0]["text"]);
|
||||||
@@ -578,7 +578,7 @@ async function applyEmbeddingsFromMetadata(
|
|||||||
schema: Schema,
|
schema: Schema,
|
||||||
): Promise<ArrowTable> {
|
): Promise<ArrowTable> {
|
||||||
const registry = getRegistry();
|
const registry = getRegistry();
|
||||||
const functions = registry.parseFunctions(schema.metadata);
|
const functions = await registry.parseFunctions(schema.metadata);
|
||||||
|
|
||||||
const columns = Object.fromEntries(
|
const columns = Object.fromEntries(
|
||||||
table.schema.fields.map((field) => [
|
table.schema.fields.map((field) => [
|
||||||
|
|||||||
@@ -240,6 +240,7 @@ export class LocalConnection extends Connection {
|
|||||||
): Promise<Table> {
|
): Promise<Table> {
|
||||||
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
|
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
|
||||||
const { name, data, ...options } = nameOrOptions;
|
const { name, data, ...options } = nameOrOptions;
|
||||||
|
|
||||||
return this.createTable(name, data, options);
|
return this.createTable(name, data, options);
|
||||||
}
|
}
|
||||||
if (data === undefined) {
|
if (data === undefined) {
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ export interface EmbeddingFunctionConstructor<
|
|||||||
> {
|
> {
|
||||||
new (modelOptions?: T["TOptions"]): T;
|
new (modelOptions?: T["TOptions"]): T;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An embedding function that automatically creates vector representation for a given column.
|
* An embedding function that automatically creates vector representation for a given column.
|
||||||
*/
|
*/
|
||||||
@@ -82,6 +83,8 @@ export abstract class EmbeddingFunction<
|
|||||||
*/
|
*/
|
||||||
abstract toJSON(): Partial<M>;
|
abstract toJSON(): Partial<M>;
|
||||||
|
|
||||||
|
async init?(): Promise<void>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* sourceField is used in combination with `LanceSchema` to provide a declarative data model
|
* sourceField is used in combination with `LanceSchema` to provide a declarative data model
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
import { DataType, Field, Schema } from "../arrow";
|
import { Field, Schema } from "../arrow";
|
||||||
import { isDataType } from "../arrow";
|
import { isDataType } from "../arrow";
|
||||||
import { sanitizeType } from "../sanitize";
|
import { sanitizeType } from "../sanitize";
|
||||||
import { EmbeddingFunction } from "./embedding_function";
|
import { EmbeddingFunction } from "./embedding_function";
|
||||||
@@ -22,6 +22,7 @@ export { EmbeddingFunction } from "./embedding_function";
|
|||||||
|
|
||||||
// We need to explicitly export '*' so that the `register` decorator actually registers the class.
|
// We need to explicitly export '*' so that the `register` decorator actually registers the class.
|
||||||
export * from "./openai";
|
export * from "./openai";
|
||||||
|
export * from "./transformers";
|
||||||
export * from "./registry";
|
export * from "./registry";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -18,9 +18,14 @@ import {
|
|||||||
} from "./embedding_function";
|
} from "./embedding_function";
|
||||||
import "reflect-metadata";
|
import "reflect-metadata";
|
||||||
import { OpenAIEmbeddingFunction } from "./openai";
|
import { OpenAIEmbeddingFunction } from "./openai";
|
||||||
|
import { TransformersEmbeddingFunction } from "./transformers";
|
||||||
|
|
||||||
|
type CreateReturnType<T> = T extends { init: () => Promise<void> }
|
||||||
|
? Promise<T>
|
||||||
|
: T;
|
||||||
|
|
||||||
interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
|
interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
|
||||||
create(options?: T["TOptions"]): T;
|
create(options?: T["TOptions"]): CreateReturnType<T>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -61,38 +66,43 @@ export class EmbeddingFunctionRegistry {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
get(name: "openai"): EmbeddingFunctionCreate<OpenAIEmbeddingFunction>;
|
||||||
|
get(
|
||||||
|
name: "huggingface",
|
||||||
|
): EmbeddingFunctionCreate<TransformersEmbeddingFunction>;
|
||||||
|
get<T extends EmbeddingFunction<unknown>>(
|
||||||
|
name: string,
|
||||||
|
): EmbeddingFunctionCreate<T> | undefined;
|
||||||
/**
|
/**
|
||||||
* Fetch an embedding function by name
|
* Fetch an embedding function by name
|
||||||
* @param name The name of the function
|
* @param name The name of the function
|
||||||
*/
|
*/
|
||||||
get<T extends EmbeddingFunction<unknown>, Name extends string = "">(
|
get(name: string) {
|
||||||
name: Name extends "openai" ? "openai" : string,
|
|
||||||
//This makes it so that you can use string constants as "types", or use an explicitly supplied type
|
|
||||||
// ex:
|
|
||||||
// `registry.get("openai") -> EmbeddingFunctionCreate<OpenAIEmbeddingFunction>`
|
|
||||||
// `registry.get<MyCustomEmbeddingFunction>("my_func") -> EmbeddingFunctionCreate<MyCustomEmbeddingFunction> | undefined`
|
|
||||||
//
|
|
||||||
// the reason this is important is that we always know our built in functions are defined so the user isnt forced to do a non null/undefined
|
|
||||||
// ```ts
|
|
||||||
// const openai: OpenAIEmbeddingFunction = registry.get("openai").create()
|
|
||||||
// ```
|
|
||||||
): Name extends "openai"
|
|
||||||
? EmbeddingFunctionCreate<OpenAIEmbeddingFunction>
|
|
||||||
: EmbeddingFunctionCreate<T> | undefined {
|
|
||||||
type Output = Name extends "openai"
|
|
||||||
? EmbeddingFunctionCreate<OpenAIEmbeddingFunction>
|
|
||||||
: EmbeddingFunctionCreate<T> | undefined;
|
|
||||||
|
|
||||||
const factory = this.#functions.get(name);
|
const factory = this.#functions.get(name);
|
||||||
if (!factory) {
|
if (!factory) {
|
||||||
return undefined as Output;
|
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||||
|
return undefined as any;
|
||||||
|
}
|
||||||
|
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||||
|
let create: any;
|
||||||
|
if (factory.prototype.init) {
|
||||||
|
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||||
|
create = async function (options?: any) {
|
||||||
|
const instance = new factory(options);
|
||||||
|
await instance.init!();
|
||||||
|
return instance;
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||||
|
create = function (options?: any) {
|
||||||
|
const instance = new factory(options);
|
||||||
|
return instance;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
create: function (options?: T["TOptions"]) {
|
create,
|
||||||
return new factory(options);
|
};
|
||||||
},
|
|
||||||
} as Output;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -105,10 +115,10 @@ export class EmbeddingFunctionRegistry {
|
|||||||
/**
|
/**
|
||||||
* @ignore
|
* @ignore
|
||||||
*/
|
*/
|
||||||
parseFunctions(
|
async parseFunctions(
|
||||||
this: EmbeddingFunctionRegistry,
|
this: EmbeddingFunctionRegistry,
|
||||||
metadata: Map<string, string>,
|
metadata: Map<string, string>,
|
||||||
): Map<string, EmbeddingFunctionConfig> {
|
): Promise<Map<string, EmbeddingFunctionConfig>> {
|
||||||
if (!metadata.has("embedding_functions")) {
|
if (!metadata.has("embedding_functions")) {
|
||||||
return new Map();
|
return new Map();
|
||||||
} else {
|
} else {
|
||||||
@@ -118,25 +128,30 @@ export class EmbeddingFunctionRegistry {
|
|||||||
vectorColumn: string;
|
vectorColumn: string;
|
||||||
model: EmbeddingFunction["TOptions"];
|
model: EmbeddingFunction["TOptions"];
|
||||||
};
|
};
|
||||||
|
|
||||||
const functions = <FunctionConfig[]>(
|
const functions = <FunctionConfig[]>(
|
||||||
JSON.parse(metadata.get("embedding_functions")!)
|
JSON.parse(metadata.get("embedding_functions")!)
|
||||||
);
|
);
|
||||||
return new Map(
|
|
||||||
functions.map((f) => {
|
const items: [string, EmbeddingFunctionConfig][] = await Promise.all(
|
||||||
|
functions.map(async (f) => {
|
||||||
const fn = this.get(f.name);
|
const fn = this.get(f.name);
|
||||||
if (!fn) {
|
if (!fn) {
|
||||||
throw new Error(`Function "${f.name}" not found in registry`);
|
throw new Error(`Function "${f.name}" not found in registry`);
|
||||||
}
|
}
|
||||||
|
const func = await this.get(f.name)!.create(f.model);
|
||||||
return [
|
return [
|
||||||
f.name,
|
f.name,
|
||||||
{
|
{
|
||||||
sourceColumn: f.sourceColumn,
|
sourceColumn: f.sourceColumn,
|
||||||
vectorColumn: f.vectorColumn,
|
vectorColumn: f.vectorColumn,
|
||||||
function: this.get(f.name)!.create(f.model),
|
function: func,
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
return new Map(items);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||||
|
|||||||
193
nodejs/lancedb/embedding/transformers.ts
Normal file
193
nodejs/lancedb/embedding/transformers.ts
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
// Copyright 2023 Lance Developers.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
import { Float, Float32 } from "../arrow";
|
||||||
|
import { EmbeddingFunction } from "./embedding_function";
|
||||||
|
import { register } from "./registry";
|
||||||
|
|
||||||
|
export type XenovaTransformerOptions = {
|
||||||
|
/** The wasm compatible model to use */
|
||||||
|
model: string;
|
||||||
|
/**
|
||||||
|
* The wasm compatible tokenizer to use
|
||||||
|
* If not provided, it will use the default tokenizer for the model
|
||||||
|
*/
|
||||||
|
tokenizer?: string;
|
||||||
|
/**
|
||||||
|
* The number of dimensions of the embeddings
|
||||||
|
*
|
||||||
|
* We will attempt to infer this from the model config if not provided.
|
||||||
|
* Since there isn't a standard way to get this information from the model,
|
||||||
|
* you may need to manually specify this if using a model that doesn't have a 'hidden_size' in the config.
|
||||||
|
* */
|
||||||
|
ndims?: number;
|
||||||
|
/** Options for the tokenizer */
|
||||||
|
tokenizerOptions?: {
|
||||||
|
textPair?: string | string[];
|
||||||
|
padding?: boolean | "max_length";
|
||||||
|
addSpecialTokens?: boolean;
|
||||||
|
truncation?: boolean;
|
||||||
|
maxLength?: number;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
@register("huggingface")
|
||||||
|
export class TransformersEmbeddingFunction extends EmbeddingFunction<
|
||||||
|
string,
|
||||||
|
Partial<XenovaTransformerOptions>
|
||||||
|
> {
|
||||||
|
#model?: import("@xenova/transformers").PreTrainedModel;
|
||||||
|
#tokenizer?: import("@xenova/transformers").PreTrainedTokenizer;
|
||||||
|
#modelName: XenovaTransformerOptions["model"];
|
||||||
|
#initialized = false;
|
||||||
|
#tokenizerOptions: XenovaTransformerOptions["tokenizerOptions"];
|
||||||
|
#ndims?: number;
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
options: Partial<XenovaTransformerOptions> = {
|
||||||
|
model: "Xenova/all-MiniLM-L6-v2",
|
||||||
|
},
|
||||||
|
) {
|
||||||
|
super();
|
||||||
|
|
||||||
|
const modelName = options?.model ?? "Xenova/all-MiniLM-L6-v2";
|
||||||
|
this.#tokenizerOptions = {
|
||||||
|
padding: true,
|
||||||
|
...options.tokenizerOptions,
|
||||||
|
};
|
||||||
|
|
||||||
|
this.#ndims = options.ndims;
|
||||||
|
this.#modelName = modelName;
|
||||||
|
}
|
||||||
|
toJSON() {
|
||||||
|
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||||
|
const obj: Record<string, any> = {
|
||||||
|
model: this.#modelName,
|
||||||
|
};
|
||||||
|
if (this.#ndims) {
|
||||||
|
obj["ndims"] = this.#ndims;
|
||||||
|
}
|
||||||
|
if (this.#tokenizerOptions) {
|
||||||
|
obj["tokenizerOptions"] = this.#tokenizerOptions;
|
||||||
|
}
|
||||||
|
if (this.#tokenizer) {
|
||||||
|
obj["tokenizer"] = this.#tokenizer.name;
|
||||||
|
}
|
||||||
|
return obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
async init() {
|
||||||
|
let transformers;
|
||||||
|
try {
|
||||||
|
// SAFETY:
|
||||||
|
// since typescript transpiles `import` to `require`, we need to do this in an unsafe way
|
||||||
|
// We can't use `require` because `@xenova/transformers` is an ESM module
|
||||||
|
// and we can't use `import` directly because typescript will transpile it to `require`.
|
||||||
|
// and we want to remain compatible with both ESM and CJS modules
|
||||||
|
// so we use `eval` to bypass typescript for this specific import.
|
||||||
|
transformers = await eval('import("@xenova/transformers")');
|
||||||
|
} catch (e) {
|
||||||
|
throw new Error(`error loading @xenova/transformers\nReason: ${e}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
this.#model = await transformers.AutoModel.from_pretrained(
|
||||||
|
this.#modelName,
|
||||||
|
);
|
||||||
|
} catch (e) {
|
||||||
|
throw new Error(
|
||||||
|
`error loading model ${this.#modelName}. Make sure you are using a wasm compatible model.\nReason: ${e}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
this.#tokenizer = await transformers.AutoTokenizer.from_pretrained(
|
||||||
|
this.#modelName,
|
||||||
|
);
|
||||||
|
} catch (e) {
|
||||||
|
throw new Error(
|
||||||
|
`error loading tokenizer for ${this.#modelName}. Make sure you are using a wasm compatible model:\nReason: ${e}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
this.#initialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
ndims(): number {
|
||||||
|
if (this.#ndims) {
|
||||||
|
return this.#ndims;
|
||||||
|
} else {
|
||||||
|
const config = this.#model!.config;
|
||||||
|
|
||||||
|
const ndims = config["hidden_size"];
|
||||||
|
if (!ndims) {
|
||||||
|
throw new Error(
|
||||||
|
"hidden_size not found in model config, you may need to manually specify the embedding dimensions. ",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return ndims;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
embeddingDataType(): Float {
|
||||||
|
return new Float32();
|
||||||
|
}
|
||||||
|
|
||||||
|
async computeSourceEmbeddings(data: string[]): Promise<number[][]> {
|
||||||
|
// this should only happen if the user is trying to use the function directly.
|
||||||
|
// Anything going through the registry should already be initialized.
|
||||||
|
if (!this.#initialized) {
|
||||||
|
return Promise.reject(
|
||||||
|
new Error(
|
||||||
|
"something went wrong: embedding function not initialized. Please call init()",
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const tokenizer = this.#tokenizer!;
|
||||||
|
const model = this.#model!;
|
||||||
|
|
||||||
|
const inputs = await tokenizer(data, this.#tokenizerOptions);
|
||||||
|
let tokens = await model.forward(inputs);
|
||||||
|
tokens = tokens[Object.keys(tokens)[0]];
|
||||||
|
|
||||||
|
const [nItems, nTokens] = tokens.dims;
|
||||||
|
|
||||||
|
tokens = tensorDiv(tokens.sum(1), nTokens);
|
||||||
|
|
||||||
|
// TODO: support other data types
|
||||||
|
const tokenData = tokens.data;
|
||||||
|
const stride = this.ndims();
|
||||||
|
|
||||||
|
const embeddings = [];
|
||||||
|
for (let i = 0; i < nItems; i++) {
|
||||||
|
const start = i * stride;
|
||||||
|
const end = start + stride;
|
||||||
|
const slice = tokenData.slice(start, end);
|
||||||
|
embeddings.push(Array.from(slice) as number[]); // TODO: Avoid copy here
|
||||||
|
}
|
||||||
|
return embeddings;
|
||||||
|
}
|
||||||
|
|
||||||
|
async computeQueryEmbeddings(data: string): Promise<number[]> {
|
||||||
|
return (await this.computeSourceEmbeddings([data]))[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const tensorDiv = (
|
||||||
|
src: import("@xenova/transformers").Tensor,
|
||||||
|
divBy: number,
|
||||||
|
) => {
|
||||||
|
for (let i = 0; i < src.data.length; ++i) {
|
||||||
|
src.data[i] /= divBy;
|
||||||
|
}
|
||||||
|
return src;
|
||||||
|
};
|
||||||
@@ -27,8 +27,7 @@ export class RestfulLanceDBClient {
|
|||||||
#apiKey: string;
|
#apiKey: string;
|
||||||
#hostOverride?: string;
|
#hostOverride?: string;
|
||||||
#closed: boolean = false;
|
#closed: boolean = false;
|
||||||
#connectionTimeout: number = 12 * 1000; // 12 seconds;
|
#timeout: number = 12 * 1000; // 12 seconds;
|
||||||
#readTimeout: number = 30 * 1000; // 30 seconds;
|
|
||||||
#session?: import("axios").AxiosInstance;
|
#session?: import("axios").AxiosInstance;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
@@ -36,15 +35,13 @@ export class RestfulLanceDBClient {
|
|||||||
apiKey: string,
|
apiKey: string,
|
||||||
region: string,
|
region: string,
|
||||||
hostOverride?: string,
|
hostOverride?: string,
|
||||||
connectionTimeout?: number,
|
timeout?: number,
|
||||||
readTimeout?: number,
|
|
||||||
) {
|
) {
|
||||||
this.#dbName = dbName;
|
this.#dbName = dbName;
|
||||||
this.#apiKey = apiKey;
|
this.#apiKey = apiKey;
|
||||||
this.#region = region;
|
this.#region = region;
|
||||||
this.#hostOverride = hostOverride ?? this.#hostOverride;
|
this.#hostOverride = hostOverride ?? this.#hostOverride;
|
||||||
this.#connectionTimeout = connectionTimeout ?? this.#connectionTimeout;
|
this.#timeout = timeout ?? this.#timeout;
|
||||||
this.#readTimeout = readTimeout ?? this.#readTimeout;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo: cache the session.
|
// todo: cache the session.
|
||||||
@@ -59,7 +56,7 @@ export class RestfulLanceDBClient {
|
|||||||
Authorization: `Bearer ${this.#apiKey}`,
|
Authorization: `Bearer ${this.#apiKey}`,
|
||||||
},
|
},
|
||||||
transformResponse: decodeErrorData,
|
transformResponse: decodeErrorData,
|
||||||
timeout: this.#connectionTimeout,
|
timeout: this.#timeout,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -111,7 +108,7 @@ export class RestfulLanceDBClient {
|
|||||||
params,
|
params,
|
||||||
});
|
});
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
if (e instanceof AxiosError) {
|
if (e instanceof AxiosError && e.response) {
|
||||||
response = e.response;
|
response = e.response;
|
||||||
} else {
|
} else {
|
||||||
throw e;
|
throw e;
|
||||||
@@ -165,7 +162,7 @@ export class RestfulLanceDBClient {
|
|||||||
params: new Map(Object.entries(additional.params ?? {})),
|
params: new Map(Object.entries(additional.params ?? {})),
|
||||||
});
|
});
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
if (e instanceof AxiosError) {
|
if (e instanceof AxiosError && e.response) {
|
||||||
response = e.response;
|
response = e.response;
|
||||||
} else {
|
} else {
|
||||||
throw e;
|
throw e;
|
||||||
|
|||||||
@@ -20,8 +20,7 @@ export interface RemoteConnectionOptions {
|
|||||||
apiKey?: string;
|
apiKey?: string;
|
||||||
region?: string;
|
region?: string;
|
||||||
hostOverride?: string;
|
hostOverride?: string;
|
||||||
connectionTimeout?: number;
|
timeout?: number;
|
||||||
readTimeout?: number;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export class RemoteConnection extends Connection {
|
export class RemoteConnection extends Connection {
|
||||||
@@ -33,13 +32,7 @@ export class RemoteConnection extends Connection {
|
|||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
url: string,
|
url: string,
|
||||||
{
|
{ apiKey, region, hostOverride, timeout }: RemoteConnectionOptions,
|
||||||
apiKey,
|
|
||||||
region,
|
|
||||||
hostOverride,
|
|
||||||
connectionTimeout,
|
|
||||||
readTimeout,
|
|
||||||
}: RemoteConnectionOptions,
|
|
||||||
) {
|
) {
|
||||||
super();
|
super();
|
||||||
apiKey = apiKey ?? process.env.LANCEDB_API_KEY;
|
apiKey = apiKey ?? process.env.LANCEDB_API_KEY;
|
||||||
@@ -68,8 +61,7 @@ export class RemoteConnection extends Connection {
|
|||||||
this.#apiKey,
|
this.#apiKey,
|
||||||
this.#region,
|
this.#region,
|
||||||
hostOverride,
|
hostOverride,
|
||||||
connectionTimeout,
|
timeout,
|
||||||
readTimeout,
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -275,12 +275,15 @@ export abstract class Table {
|
|||||||
* of the given query vector
|
* of the given query vector
|
||||||
* @param {string} query - the query. This will be converted to a vector using the table's provided embedding function
|
* @param {string} query - the query. This will be converted to a vector using the table's provided embedding function
|
||||||
* @note If no embedding functions are defined in the table, this will error when collecting the results.
|
* @note If no embedding functions are defined in the table, this will error when collecting the results.
|
||||||
|
*
|
||||||
|
* This is just a convenience method for calling `.query().nearestTo(await myEmbeddingFunction(query))`
|
||||||
*/
|
*/
|
||||||
abstract search(query: string): VectorQuery;
|
abstract search(query: string): VectorQuery;
|
||||||
/**
|
/**
|
||||||
* Create a search query to find the nearest neighbors
|
* Create a search query to find the nearest neighbors
|
||||||
* of the given query vector
|
* of the given query vector
|
||||||
* @param {IntoVector} query - the query vector
|
* @param {IntoVector} query - the query vector
|
||||||
|
* This is just a convenience method for calling `.query().nearestTo(query)`
|
||||||
*/
|
*/
|
||||||
abstract search(query: IntoVector): VectorQuery;
|
abstract search(query: IntoVector): VectorQuery;
|
||||||
/**
|
/**
|
||||||
@@ -490,7 +493,7 @@ export class LocalTable extends Table {
|
|||||||
const mode = options?.mode ?? "append";
|
const mode = options?.mode ?? "append";
|
||||||
const schema = await this.schema();
|
const schema = await this.schema();
|
||||||
const registry = getRegistry();
|
const registry = getRegistry();
|
||||||
const functions = registry.parseFunctions(schema.metadata);
|
const functions = await registry.parseFunctions(schema.metadata);
|
||||||
|
|
||||||
const buffer = await fromDataToBuffer(
|
const buffer = await fromDataToBuffer(
|
||||||
data,
|
data,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.7.1",
|
"version": "0.7.2",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.7.1",
|
"version": "0.7.2",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.7.1",
|
"version": "0.7.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.7.1",
|
"version": "0.7.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.7.1",
|
"version": "0.7.2",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
762
nodejs/package-lock.json
generated
762
nodejs/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -10,7 +10,7 @@
|
|||||||
"vector database",
|
"vector database",
|
||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"version": "0.7.1",
|
"version": "0.7.2",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
@@ -32,12 +32,13 @@
|
|||||||
},
|
},
|
||||||
"license": "Apache 2.0",
|
"license": "Apache 2.0",
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"@aws-sdk/client-dynamodb": "^3.33.0",
|
||||||
"@aws-sdk/client-kms": "^3.33.0",
|
"@aws-sdk/client-kms": "^3.33.0",
|
||||||
"@aws-sdk/client-s3": "^3.33.0",
|
"@aws-sdk/client-s3": "^3.33.0",
|
||||||
"@aws-sdk/client-dynamodb": "^3.33.0",
|
|
||||||
"@biomejs/biome": "^1.7.3",
|
"@biomejs/biome": "^1.7.3",
|
||||||
"@jest/globals": "^29.7.0",
|
"@jest/globals": "^29.7.0",
|
||||||
"@napi-rs/cli": "^2.18.3",
|
"@napi-rs/cli": "^2.18.3",
|
||||||
|
"@types/axios": "^0.14.0",
|
||||||
"@types/jest": "^29.1.2",
|
"@types/jest": "^29.1.2",
|
||||||
"@types/tmp": "^0.2.6",
|
"@types/tmp": "^0.2.6",
|
||||||
"apache-arrow-13": "npm:apache-arrow@13.0.0",
|
"apache-arrow-13": "npm:apache-arrow@13.0.0",
|
||||||
@@ -53,8 +54,7 @@
|
|||||||
"typedoc": "^0.26.4",
|
"typedoc": "^0.26.4",
|
||||||
"typedoc-plugin-markdown": "^4.2.1",
|
"typedoc-plugin-markdown": "^4.2.1",
|
||||||
"typescript": "^5.3.3",
|
"typescript": "^5.3.3",
|
||||||
"typescript-eslint": "^7.1.0",
|
"typescript-eslint": "^7.1.0"
|
||||||
"@types/axios": "^0.14.0"
|
|
||||||
},
|
},
|
||||||
"ava": {
|
"ava": {
|
||||||
"timeout": "3m"
|
"timeout": "3m"
|
||||||
@@ -85,6 +85,7 @@
|
|||||||
"reflect-metadata": "^0.2.2"
|
"reflect-metadata": "^0.2.2"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
|
"@xenova/transformers": ">=2.17 < 3",
|
||||||
"openai": "^4.29.2"
|
"openai": "^4.29.2"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.10.2"
|
current_version = "0.11.0"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.10.2"
|
version = "0.11.0"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
@@ -14,11 +14,13 @@ name = "_lancedb"
|
|||||||
crate-type = ["cdylib"]
|
crate-type = ["cdylib"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
arrow = { version = "51.0.0", features = ["pyarrow"] }
|
arrow = { version = "52.1", features = ["pyarrow"] }
|
||||||
lancedb = { path = "../rust/lancedb" }
|
lancedb = { path = "../rust/lancedb" }
|
||||||
env_logger = "0.10"
|
env_logger = "0.10"
|
||||||
pyo3 = { version = "0.20", features = ["extension-module", "abi3-py38"] }
|
pyo3 = { version = "0.21", features = ["extension-module", "abi3-py38", "gil-refs"] }
|
||||||
pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] }
|
# Using this fork for now: https://github.com/awestlake87/pyo3-asyncio/issues/119
|
||||||
|
# pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] }
|
||||||
|
pyo3-asyncio-0-21 = { version = "0.21.0", features = ["attributes", "tokio-runtime"] }
|
||||||
|
|
||||||
# Prevent dynamic linking of lzma, which comes from datafusion
|
# Prevent dynamic linking of lzma, which comes from datafusion
|
||||||
lzma-sys = { version = "*", features = ["static"] }
|
lzma-sys = { version = "*", features = ["static"] }
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ name = "lancedb"
|
|||||||
# version in Cargo.toml
|
# version in Cargo.toml
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"deprecation",
|
"deprecation",
|
||||||
"pylance==0.14.1",
|
"pylance==0.15.0",
|
||||||
"ratelimiter~=1.0",
|
"ratelimiter~=1.0",
|
||||||
"requests>=2.31.0",
|
"requests>=2.31.0",
|
||||||
"retry>=0.9.2",
|
"retry>=0.9.2",
|
||||||
|
|||||||
@@ -732,7 +732,7 @@ class AsyncConnection(object):
|
|||||||
fill_value = 0.0
|
fill_value = 0.0
|
||||||
|
|
||||||
if data is not None:
|
if data is not None:
|
||||||
data = _sanitize_data(
|
data, schema = _sanitize_data(
|
||||||
data,
|
data,
|
||||||
schema,
|
schema,
|
||||||
metadata=metadata,
|
metadata=metadata,
|
||||||
|
|||||||
@@ -428,9 +428,9 @@ class LanceQueryBuilder(ABC):
|
|||||||
>>> query = [100, 100]
|
>>> query = [100, 100]
|
||||||
>>> plan = table.search(query).explain_plan(True)
|
>>> plan = table.search(query).explain_plan(True)
|
||||||
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||||
Projection: fields=[vector, _distance]
|
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
|
||||||
FilterExec: _distance@2 IS NOT NULL
|
FilterExec: _distance@2 IS NOT NULL
|
||||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST]
|
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
|
||||||
KNNVectorDistance: metric=l2
|
KNNVectorDistance: metric=l2
|
||||||
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
||||||
|
|
||||||
@@ -1214,9 +1214,9 @@ class AsyncQueryBase(object):
|
|||||||
... plan = await table.query().nearest_to([1, 2]).explain_plan(True)
|
... plan = await table.query().nearest_to([1, 2]).explain_plan(True)
|
||||||
... print(plan)
|
... print(plan)
|
||||||
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||||
Projection: fields=[vector, _distance]
|
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
|
||||||
FilterExec: _distance@2 IS NOT NULL
|
FilterExec: _distance@2 IS NOT NULL
|
||||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST]
|
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
|
||||||
KNNVectorDistance: metric=l2
|
KNNVectorDistance: metric=l2
|
||||||
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
||||||
|
|
||||||
|
|||||||
@@ -245,7 +245,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
schema = schema.to_arrow_schema()
|
schema = schema.to_arrow_schema()
|
||||||
|
|
||||||
if data is not None:
|
if data is not None:
|
||||||
data = _sanitize_data(
|
data, schema = _sanitize_data(
|
||||||
data,
|
data,
|
||||||
schema,
|
schema,
|
||||||
metadata=None,
|
metadata=None,
|
||||||
|
|||||||
@@ -210,7 +210,7 @@ class RemoteTable(Table):
|
|||||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||||
|
|
||||||
"""
|
"""
|
||||||
data = _sanitize_data(
|
data, _ = _sanitize_data(
|
||||||
data,
|
data,
|
||||||
self.schema,
|
self.schema,
|
||||||
metadata=None,
|
metadata=None,
|
||||||
@@ -345,7 +345,7 @@ class RemoteTable(Table):
|
|||||||
on_bad_vectors: str,
|
on_bad_vectors: str,
|
||||||
fill_value: float,
|
fill_value: float,
|
||||||
):
|
):
|
||||||
data = _sanitize_data(
|
data, _ = _sanitize_data(
|
||||||
new_data,
|
new_data,
|
||||||
self.schema,
|
self.schema,
|
||||||
metadata=None,
|
metadata=None,
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ from .cross_encoder import CrossEncoderReranker
|
|||||||
from .linear_combination import LinearCombinationReranker
|
from .linear_combination import LinearCombinationReranker
|
||||||
from .openai import OpenaiReranker
|
from .openai import OpenaiReranker
|
||||||
from .jinaai import JinaReranker
|
from .jinaai import JinaReranker
|
||||||
|
from .rrf import RRFReranker
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"Reranker",
|
"Reranker",
|
||||||
@@ -14,4 +15,5 @@ __all__ = [
|
|||||||
"OpenaiReranker",
|
"OpenaiReranker",
|
||||||
"ColbertReranker",
|
"ColbertReranker",
|
||||||
"JinaReranker",
|
"JinaReranker",
|
||||||
|
"RRFReranker",
|
||||||
]
|
]
|
||||||
|
|||||||
60
python/python/lancedb/rerankers/rrf.py
Normal file
60
python/python/lancedb/rerankers/rrf.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
from .base import Reranker
|
||||||
|
|
||||||
|
|
||||||
|
class RRFReranker(Reranker):
|
||||||
|
"""
|
||||||
|
Reranks the results using Reciprocal Rank Fusion(RRF) algorithm based
|
||||||
|
on the scores of vector and FTS search.
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
K : int, default 60
|
||||||
|
A constant used in the RRF formula (default is 60). Experiments
|
||||||
|
indicate that k = 60 was near-optimal, but that the choice is
|
||||||
|
not critical. See paper:
|
||||||
|
https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf
|
||||||
|
return_score : str, default "relevance"
|
||||||
|
opntions are "relevance" or "all"
|
||||||
|
The type of score to return. If "relevance", will return only the relevance
|
||||||
|
score. If "all", will return all scores from the vector and FTS search along
|
||||||
|
with the relevance score.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, K: int = 60, return_score="relevance"):
|
||||||
|
if K <= 0:
|
||||||
|
raise ValueError("K must be greater than 0")
|
||||||
|
super().__init__(return_score)
|
||||||
|
self.K = K
|
||||||
|
|
||||||
|
def rerank_hybrid(
|
||||||
|
self,
|
||||||
|
query: str, # noqa: F821
|
||||||
|
vector_results: pa.Table,
|
||||||
|
fts_results: pa.Table,
|
||||||
|
):
|
||||||
|
vector_ids = vector_results["_rowid"].to_pylist() if vector_results else []
|
||||||
|
fts_ids = fts_results["_rowid"].to_pylist() if fts_results else []
|
||||||
|
rrf_score_map = defaultdict(float)
|
||||||
|
|
||||||
|
# Calculate RRF score of each result
|
||||||
|
for ids in [vector_ids, fts_ids]:
|
||||||
|
for i, result_id in enumerate(ids, 1):
|
||||||
|
rrf_score_map[result_id] += 1 / (i + self.K)
|
||||||
|
|
||||||
|
# Sort the results based on RRF score
|
||||||
|
combined_results = self.merge_results(vector_results, fts_results)
|
||||||
|
combined_row_ids = combined_results["_rowid"].to_pylist()
|
||||||
|
relevance_scores = [rrf_score_map[row_id] for row_id in combined_row_ids]
|
||||||
|
combined_results = combined_results.append_column(
|
||||||
|
"_relevance_score", pa.array(relevance_scores, type=pa.float32())
|
||||||
|
)
|
||||||
|
combined_results = combined_results.sort_by(
|
||||||
|
[("_relevance_score", "descending")]
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.score == "relevance":
|
||||||
|
combined_results = combined_results.drop_columns(["score", "_distance"])
|
||||||
|
|
||||||
|
return combined_results
|
||||||
@@ -103,7 +103,8 @@ def _sanitize_data(
|
|||||||
if isinstance(data, list):
|
if isinstance(data, list):
|
||||||
# convert to list of dict if data is a bunch of LanceModels
|
# convert to list of dict if data is a bunch of LanceModels
|
||||||
if isinstance(data[0], LanceModel):
|
if isinstance(data[0], LanceModel):
|
||||||
schema = data[0].__class__.to_arrow_schema()
|
if schema is None:
|
||||||
|
schema = data[0].__class__.to_arrow_schema()
|
||||||
data = [model_to_dict(d) for d in data]
|
data = [model_to_dict(d) for d in data]
|
||||||
data = pa.Table.from_pylist(data, schema=schema)
|
data = pa.Table.from_pylist(data, schema=schema)
|
||||||
else:
|
else:
|
||||||
@@ -133,7 +134,7 @@ def _sanitize_data(
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise TypeError(f"Unsupported data type: {type(data)}")
|
raise TypeError(f"Unsupported data type: {type(data)}")
|
||||||
return data
|
return data, schema
|
||||||
|
|
||||||
|
|
||||||
def _schema_from_hf(data, schema):
|
def _schema_from_hf(data, schema):
|
||||||
@@ -205,7 +206,7 @@ def _to_record_batch_generator(
|
|||||||
# and do things like add the vector column etc
|
# and do things like add the vector column etc
|
||||||
if isinstance(batch, pa.RecordBatch):
|
if isinstance(batch, pa.RecordBatch):
|
||||||
batch = pa.Table.from_batches([batch])
|
batch = pa.Table.from_batches([batch])
|
||||||
batch = _sanitize_data(batch, schema, metadata, on_bad_vectors, fill_value)
|
batch, _ = _sanitize_data(batch, schema, metadata, on_bad_vectors, fill_value)
|
||||||
for b in batch.to_batches():
|
for b in batch.to_batches():
|
||||||
yield b
|
yield b
|
||||||
|
|
||||||
@@ -1295,7 +1296,7 @@ class LanceTable(Table):
|
|||||||
The number of vectors in the table.
|
The number of vectors in the table.
|
||||||
"""
|
"""
|
||||||
# TODO: manage table listing and metadata separately
|
# TODO: manage table listing and metadata separately
|
||||||
data = _sanitize_data(
|
data, _ = _sanitize_data(
|
||||||
data,
|
data,
|
||||||
self.schema,
|
self.schema,
|
||||||
metadata=self.schema.metadata,
|
metadata=self.schema.metadata,
|
||||||
@@ -1547,7 +1548,7 @@ class LanceTable(Table):
|
|||||||
metadata = registry.get_table_metadata(embedding_functions)
|
metadata = registry.get_table_metadata(embedding_functions)
|
||||||
|
|
||||||
if data is not None:
|
if data is not None:
|
||||||
data = _sanitize_data(
|
data, schema = _sanitize_data(
|
||||||
data,
|
data,
|
||||||
schema,
|
schema,
|
||||||
metadata=metadata,
|
metadata=metadata,
|
||||||
@@ -1675,7 +1676,7 @@ class LanceTable(Table):
|
|||||||
on_bad_vectors: str,
|
on_bad_vectors: str,
|
||||||
fill_value: float,
|
fill_value: float,
|
||||||
):
|
):
|
||||||
new_data = _sanitize_data(
|
new_data, _ = _sanitize_data(
|
||||||
new_data,
|
new_data,
|
||||||
self.schema,
|
self.schema,
|
||||||
metadata=self.schema.metadata,
|
metadata=self.schema.metadata,
|
||||||
@@ -2153,7 +2154,7 @@ class AsyncTable:
|
|||||||
on_bad_vectors = "error"
|
on_bad_vectors = "error"
|
||||||
if fill_value is None:
|
if fill_value is None:
|
||||||
fill_value = 0.0
|
fill_value = 0.0
|
||||||
data = _sanitize_data(
|
data, _ = _sanitize_data(
|
||||||
data,
|
data,
|
||||||
schema,
|
schema,
|
||||||
metadata=schema.metadata,
|
metadata=schema.metadata,
|
||||||
|
|||||||
@@ -124,3 +124,17 @@ def test_bad_hf_dataset(tmp_path: Path, mock_embedding_function, hf_dataset_with
|
|||||||
# this should still work because we don't add the split column
|
# this should still work because we don't add the split column
|
||||||
# if it already exists
|
# if it already exists
|
||||||
train_table.add(hf_dataset_with_split)
|
train_table.add(hf_dataset_with_split)
|
||||||
|
|
||||||
|
|
||||||
|
def test_generator(tmp_path: Path):
|
||||||
|
db = lancedb.connect(tmp_path)
|
||||||
|
|
||||||
|
def gen():
|
||||||
|
yield {"pokemon": "bulbasaur", "type": "grass"}
|
||||||
|
yield {"pokemon": "squirtle", "type": "water"}
|
||||||
|
|
||||||
|
ds = datasets.Dataset.from_generator(gen)
|
||||||
|
tbl = db.create_table("pokemon", ds)
|
||||||
|
|
||||||
|
assert len(tbl) == 2
|
||||||
|
assert tbl.schema == ds.features.arrow_schema
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ from lancedb.conftest import MockTextEmbeddingFunction # noqa
|
|||||||
from lancedb.embeddings import EmbeddingFunctionRegistry
|
from lancedb.embeddings import EmbeddingFunctionRegistry
|
||||||
from lancedb.pydantic import LanceModel, Vector
|
from lancedb.pydantic import LanceModel, Vector
|
||||||
from lancedb.rerankers import (
|
from lancedb.rerankers import (
|
||||||
|
LinearCombinationReranker,
|
||||||
|
RRFReranker,
|
||||||
CohereReranker,
|
CohereReranker,
|
||||||
ColbertReranker,
|
ColbertReranker,
|
||||||
CrossEncoderReranker,
|
CrossEncoderReranker,
|
||||||
@@ -140,7 +142,7 @@ def _run_test_reranker(reranker, table, query, query_vector, schema):
|
|||||||
assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), err
|
assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), err
|
||||||
|
|
||||||
|
|
||||||
def test_linear_combination(tmp_path):
|
def _run_test_hybrid_reranker(reranker, tmp_path):
|
||||||
table, schema = get_test_table(tmp_path)
|
table, schema = get_test_table(tmp_path)
|
||||||
# The default reranker
|
# The default reranker
|
||||||
result1 = (
|
result1 = (
|
||||||
@@ -177,6 +179,16 @@ def test_linear_combination(tmp_path):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_linear_combination(tmp_path):
|
||||||
|
reranker = LinearCombinationReranker()
|
||||||
|
_run_test_hybrid_reranker(reranker, tmp_path)
|
||||||
|
|
||||||
|
|
||||||
|
def test_rrf_reranker(tmp_path):
|
||||||
|
reranker = RRFReranker()
|
||||||
|
_run_test_hybrid_reranker(reranker, tmp_path)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
os.environ.get("COHERE_API_KEY") is None, reason="COHERE_API_KEY not set"
|
os.environ.get("COHERE_API_KEY") is None, reason="COHERE_API_KEY not set"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -9,8 +9,8 @@ use arrow::{
|
|||||||
};
|
};
|
||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
use lancedb::arrow::SendableRecordBatchStream;
|
use lancedb::arrow::SendableRecordBatchStream;
|
||||||
use pyo3::{pyclass, pymethods, PyAny, PyObject, PyRef, PyResult, Python};
|
use pyo3::{pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult, Python};
|
||||||
use pyo3_asyncio::tokio::future_into_py;
|
use pyo3_asyncio_0_21::tokio::future_into_py;
|
||||||
|
|
||||||
use crate::error::PythonErrorExt;
|
use crate::error::PythonErrorExt;
|
||||||
|
|
||||||
@@ -36,7 +36,7 @@ impl RecordBatchStream {
|
|||||||
(*self.schema).clone().into_pyarrow(py)
|
(*self.schema).clone().into_pyarrow(py)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn next(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
|
pub fn next(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let inner_next = inner.lock().await.next().await;
|
let inner_next = inner.lock().await.next().await;
|
||||||
|
|||||||
@@ -18,9 +18,9 @@ use arrow::{datatypes::Schema, ffi_stream::ArrowArrayStreamReader, pyarrow::From
|
|||||||
use lancedb::connection::{Connection as LanceConnection, CreateTableMode};
|
use lancedb::connection::{Connection as LanceConnection, CreateTableMode};
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
exceptions::{PyRuntimeError, PyValueError},
|
exceptions::{PyRuntimeError, PyValueError},
|
||||||
pyclass, pyfunction, pymethods, PyAny, PyRef, PyResult, Python,
|
pyclass, pyfunction, pymethods, Bound, PyAny, PyRef, PyResult, Python,
|
||||||
};
|
};
|
||||||
use pyo3_asyncio::tokio::future_into_py;
|
use pyo3_asyncio_0_21::tokio::future_into_py;
|
||||||
|
|
||||||
use crate::{error::PythonErrorExt, table::Table};
|
use crate::{error::PythonErrorExt, table::Table};
|
||||||
|
|
||||||
@@ -73,7 +73,7 @@ impl Connection {
|
|||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
start_after: Option<String>,
|
start_after: Option<String>,
|
||||||
limit: Option<u32>,
|
limit: Option<u32>,
|
||||||
) -> PyResult<&PyAny> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
let mut op = inner.table_names();
|
let mut op = inner.table_names();
|
||||||
if let Some(start_after) = start_after {
|
if let Some(start_after) = start_after {
|
||||||
@@ -89,15 +89,15 @@ impl Connection {
|
|||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
name: String,
|
name: String,
|
||||||
mode: &str,
|
mode: &str,
|
||||||
data: &PyAny,
|
data: Bound<'_, PyAny>,
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
use_legacy_format: Option<bool>,
|
use_legacy_format: Option<bool>,
|
||||||
) -> PyResult<&'a PyAny> {
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
|
|
||||||
let mode = Self::parse_create_mode_str(mode)?;
|
let mode = Self::parse_create_mode_str(mode)?;
|
||||||
|
|
||||||
let batches = ArrowArrayStreamReader::from_pyarrow(data)?;
|
let batches = ArrowArrayStreamReader::from_pyarrow_bound(&data)?;
|
||||||
let mut builder = inner.create_table(name, batches).mode(mode);
|
let mut builder = inner.create_table(name, batches).mode(mode);
|
||||||
|
|
||||||
if let Some(storage_options) = storage_options {
|
if let Some(storage_options) = storage_options {
|
||||||
@@ -118,15 +118,15 @@ impl Connection {
|
|||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
name: String,
|
name: String,
|
||||||
mode: &str,
|
mode: &str,
|
||||||
schema: &PyAny,
|
schema: Bound<'_, PyAny>,
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
use_legacy_format: Option<bool>,
|
use_legacy_format: Option<bool>,
|
||||||
) -> PyResult<&'a PyAny> {
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
|
|
||||||
let mode = Self::parse_create_mode_str(mode)?;
|
let mode = Self::parse_create_mode_str(mode)?;
|
||||||
|
|
||||||
let schema = Schema::from_pyarrow(schema)?;
|
let schema = Schema::from_pyarrow_bound(&schema)?;
|
||||||
|
|
||||||
let mut builder = inner.create_empty_table(name, Arc::new(schema)).mode(mode);
|
let mut builder = inner.create_empty_table(name, Arc::new(schema)).mode(mode);
|
||||||
|
|
||||||
@@ -150,7 +150,7 @@ impl Connection {
|
|||||||
name: String,
|
name: String,
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
index_cache_size: Option<u32>,
|
index_cache_size: Option<u32>,
|
||||||
) -> PyResult<&PyAny> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
let mut builder = inner.open_table(name);
|
let mut builder = inner.open_table(name);
|
||||||
if let Some(storage_options) = storage_options {
|
if let Some(storage_options) = storage_options {
|
||||||
@@ -165,14 +165,14 @@ impl Connection {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn drop_table(self_: PyRef<'_, Self>, name: String) -> PyResult<&PyAny> {
|
pub fn drop_table(self_: PyRef<'_, Self>, name: String) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner.drop_table(name).await.infer_error()
|
inner.drop_table(name).await.infer_error()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn drop_db(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
|
pub fn drop_db(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
future_into_py(
|
future_into_py(
|
||||||
self_.py(),
|
self_.py(),
|
||||||
@@ -190,7 +190,7 @@ pub fn connect(
|
|||||||
host_override: Option<String>,
|
host_override: Option<String>,
|
||||||
read_consistency_interval: Option<f64>,
|
read_consistency_interval: Option<f64>,
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
) -> PyResult<&PyAny> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
future_into_py(py, async move {
|
future_into_py(py, async move {
|
||||||
let mut builder = lancedb::connect(&uri);
|
let mut builder = lancedb::connect(&uri);
|
||||||
if let Some(api_key) = api_key {
|
if let Some(api_key) = api_key {
|
||||||
|
|||||||
@@ -22,10 +22,11 @@ use lancedb::query::{
|
|||||||
use pyo3::exceptions::PyRuntimeError;
|
use pyo3::exceptions::PyRuntimeError;
|
||||||
use pyo3::pyclass;
|
use pyo3::pyclass;
|
||||||
use pyo3::pymethods;
|
use pyo3::pymethods;
|
||||||
|
use pyo3::Bound;
|
||||||
use pyo3::PyAny;
|
use pyo3::PyAny;
|
||||||
use pyo3::PyRef;
|
use pyo3::PyRef;
|
||||||
use pyo3::PyResult;
|
use pyo3::PyResult;
|
||||||
use pyo3_asyncio::tokio::future_into_py;
|
use pyo3_asyncio_0_21::tokio::future_into_py;
|
||||||
|
|
||||||
use crate::arrow::RecordBatchStream;
|
use crate::arrow::RecordBatchStream;
|
||||||
use crate::error::PythonErrorExt;
|
use crate::error::PythonErrorExt;
|
||||||
@@ -60,14 +61,17 @@ impl Query {
|
|||||||
self.inner = self.inner.clone().limit(limit as usize);
|
self.inner = self.inner.clone().limit(limit as usize);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn nearest_to(&mut self, vector: &PyAny) -> PyResult<VectorQuery> {
|
pub fn nearest_to(&mut self, vector: Bound<'_, PyAny>) -> PyResult<VectorQuery> {
|
||||||
let data: ArrayData = ArrayData::from_pyarrow(vector)?;
|
let data: ArrayData = ArrayData::from_pyarrow_bound(&vector)?;
|
||||||
let array = make_array(data);
|
let array = make_array(data);
|
||||||
let inner = self.inner.clone().nearest_to(array).infer_error()?;
|
let inner = self.inner.clone().nearest_to(array).infer_error()?;
|
||||||
Ok(VectorQuery { inner })
|
Ok(VectorQuery { inner })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(self_: PyRef<'_, Self>, max_batch_length: Option<u32>) -> PyResult<&PyAny> {
|
pub fn execute(
|
||||||
|
self_: PyRef<'_, Self>,
|
||||||
|
max_batch_length: Option<u32>,
|
||||||
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let mut opts = QueryExecutionOptions::default();
|
let mut opts = QueryExecutionOptions::default();
|
||||||
@@ -79,7 +83,7 @@ impl Query {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<&PyAny> {
|
fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner
|
inner
|
||||||
@@ -139,7 +143,10 @@ impl VectorQuery {
|
|||||||
self.inner = self.inner.clone().bypass_vector_index()
|
self.inner = self.inner.clone().bypass_vector_index()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(self_: PyRef<'_, Self>, max_batch_length: Option<u32>) -> PyResult<&PyAny> {
|
pub fn execute(
|
||||||
|
self_: PyRef<'_, Self>,
|
||||||
|
max_batch_length: Option<u32>,
|
||||||
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let mut opts = QueryExecutionOptions::default();
|
let mut opts = QueryExecutionOptions::default();
|
||||||
@@ -151,7 +158,7 @@ impl VectorQuery {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<&PyAny> {
|
fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner
|
inner
|
||||||
|
|||||||
@@ -9,9 +9,9 @@ use pyo3::{
|
|||||||
exceptions::{PyRuntimeError, PyValueError},
|
exceptions::{PyRuntimeError, PyValueError},
|
||||||
pyclass, pymethods,
|
pyclass, pymethods,
|
||||||
types::{PyDict, PyString},
|
types::{PyDict, PyString},
|
||||||
PyAny, PyRef, PyResult, Python,
|
Bound, PyAny, PyRef, PyResult, Python,
|
||||||
};
|
};
|
||||||
use pyo3_asyncio::tokio::future_into_py;
|
use pyo3_asyncio_0_21::tokio::future_into_py;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
error::PythonErrorExt,
|
error::PythonErrorExt,
|
||||||
@@ -91,7 +91,7 @@ impl Table {
|
|||||||
self.inner.take();
|
self.inner.take();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn schema(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
|
pub fn schema(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let schema = inner.schema().await.infer_error()?;
|
let schema = inner.schema().await.infer_error()?;
|
||||||
@@ -99,8 +99,12 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn add<'a>(self_: PyRef<'a, Self>, data: &PyAny, mode: String) -> PyResult<&'a PyAny> {
|
pub fn add<'a>(
|
||||||
let batches = ArrowArrayStreamReader::from_pyarrow(data)?;
|
self_: PyRef<'a, Self>,
|
||||||
|
data: Bound<'_, PyAny>,
|
||||||
|
mode: String,
|
||||||
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
|
let batches = ArrowArrayStreamReader::from_pyarrow_bound(&data)?;
|
||||||
let mut op = self_.inner_ref()?.add(batches);
|
let mut op = self_.inner_ref()?.add(batches);
|
||||||
if mode == "append" {
|
if mode == "append" {
|
||||||
op = op.mode(AddDataMode::Append);
|
op = op.mode(AddDataMode::Append);
|
||||||
@@ -116,7 +120,7 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn delete(self_: PyRef<'_, Self>, condition: String) -> PyResult<&PyAny> {
|
pub fn delete(self_: PyRef<'_, Self>, condition: String) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner.delete(&condition).await.infer_error()
|
inner.delete(&condition).await.infer_error()
|
||||||
@@ -127,7 +131,7 @@ impl Table {
|
|||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
updates: &PyDict,
|
updates: &PyDict,
|
||||||
r#where: Option<String>,
|
r#where: Option<String>,
|
||||||
) -> PyResult<&'a PyAny> {
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
let mut op = self_.inner_ref()?.update();
|
let mut op = self_.inner_ref()?.update();
|
||||||
if let Some(only_if) = r#where {
|
if let Some(only_if) = r#where {
|
||||||
op = op.only_if(only_if);
|
op = op.only_if(only_if);
|
||||||
@@ -145,7 +149,10 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn count_rows(self_: PyRef<'_, Self>, filter: Option<String>) -> PyResult<&PyAny> {
|
pub fn count_rows(
|
||||||
|
self_: PyRef<'_, Self>,
|
||||||
|
filter: Option<String>,
|
||||||
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner.count_rows(filter).await.infer_error()
|
inner.count_rows(filter).await.infer_error()
|
||||||
@@ -157,7 +164,7 @@ impl Table {
|
|||||||
column: String,
|
column: String,
|
||||||
index: Option<&Index>,
|
index: Option<&Index>,
|
||||||
replace: Option<bool>,
|
replace: Option<bool>,
|
||||||
) -> PyResult<&'a PyAny> {
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
let index = if let Some(index) = index {
|
let index = if let Some(index) = index {
|
||||||
index.consume()?
|
index.consume()?
|
||||||
} else {
|
} else {
|
||||||
@@ -174,7 +181,7 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
|
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
Ok(inner
|
Ok(inner
|
||||||
@@ -194,7 +201,7 @@ impl Table {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn version(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
|
pub fn version(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(
|
future_into_py(
|
||||||
self_.py(),
|
self_.py(),
|
||||||
@@ -202,21 +209,21 @@ impl Table {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn checkout(self_: PyRef<'_, Self>, version: u64) -> PyResult<&PyAny> {
|
pub fn checkout(self_: PyRef<'_, Self>, version: u64) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner.checkout(version).await.infer_error()
|
inner.checkout(version).await.infer_error()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn checkout_latest(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
|
pub fn checkout_latest(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner.checkout_latest().await.infer_error()
|
inner.checkout_latest().await.infer_error()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn restore(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
|
pub fn restore(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(
|
future_into_py(
|
||||||
self_.py(),
|
self_.py(),
|
||||||
@@ -228,7 +235,10 @@ impl Table {
|
|||||||
Query::new(self.inner_ref().unwrap().query())
|
Query::new(self.inner_ref().unwrap().query())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn optimize(self_: PyRef<'_, Self>, cleanup_since_ms: Option<u64>) -> PyResult<&PyAny> {
|
pub fn optimize(
|
||||||
|
self_: PyRef<'_, Self>,
|
||||||
|
cleanup_since_ms: Option<u64>,
|
||||||
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
let older_than = if let Some(ms) = cleanup_since_ms {
|
let older_than = if let Some(ms) = cleanup_since_ms {
|
||||||
if ms > i64::MAX as u64 {
|
if ms > i64::MAX as u64 {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.7.1"
|
version = "0.7.2"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.7.1"
|
version = "0.7.2"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
@@ -57,14 +57,11 @@ tempfile = "3.5.0"
|
|||||||
rand = { version = "0.8.3", features = ["small_rng"] }
|
rand = { version = "0.8.3", features = ["small_rng"] }
|
||||||
uuid = { version = "1.7.0", features = ["v4"] }
|
uuid = { version = "1.7.0", features = ["v4"] }
|
||||||
walkdir = "2"
|
walkdir = "2"
|
||||||
# For s3 integration tests (dev deps aren't allowed to be optional atm)
|
aws-sdk-dynamodb = { version = "1.38.0" }
|
||||||
# We pin these because the content-length check breaks with localstack
|
aws-sdk-s3 = { version = "1.38.0" }
|
||||||
# https://github.com/smithy-lang/smithy-rs/releases/tag/release-2024-05-21
|
aws-sdk-kms = { version = "1.37" }
|
||||||
aws-sdk-dynamodb = { version = "=1.23.0" }
|
|
||||||
aws-sdk-s3 = { version = "=1.23.0" }
|
|
||||||
aws-sdk-kms = { version = "=1.21.0" }
|
|
||||||
aws-config = { version = "1.0" }
|
aws-config = { version = "1.0" }
|
||||||
aws-smithy-runtime = { version = "=1.3.1" }
|
aws-smithy-runtime = { version = "1.3" }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = []
|
default = []
|
||||||
|
|||||||
@@ -14,26 +14,16 @@
|
|||||||
|
|
||||||
//! A mirroring object store that mirror writes to a secondary object store
|
//! A mirroring object store that mirror writes to a secondary object store
|
||||||
|
|
||||||
use std::{
|
use std::{fmt::Formatter, sync::Arc};
|
||||||
fmt::Formatter,
|
|
||||||
pin::Pin,
|
|
||||||
sync::Arc,
|
|
||||||
task::{Context, Poll},
|
|
||||||
};
|
|
||||||
|
|
||||||
use bytes::Bytes;
|
use futures::{stream::BoxStream, TryFutureExt};
|
||||||
use futures::{stream::BoxStream, FutureExt, StreamExt};
|
|
||||||
use lance::io::WrappingObjectStore;
|
use lance::io::WrappingObjectStore;
|
||||||
use object_store::{
|
use object_store::{
|
||||||
path::Path, Error, GetOptions, GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore,
|
path::Path, Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
|
||||||
PutOptions, PutResult, Result,
|
PutMultipartOpts, PutOptions, PutPayload, PutResult, Result, UploadPart,
|
||||||
};
|
};
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use tokio::{
|
|
||||||
io::{AsyncWrite, AsyncWriteExt},
|
|
||||||
task::JoinHandle,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct MirroringObjectStore {
|
struct MirroringObjectStore {
|
||||||
@@ -72,19 +62,10 @@ impl PrimaryOnly for Path {
|
|||||||
/// Note: this object store does not mirror writes to *.manifest files
|
/// Note: this object store does not mirror writes to *.manifest files
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl ObjectStore for MirroringObjectStore {
|
impl ObjectStore for MirroringObjectStore {
|
||||||
async fn put(&self, location: &Path, bytes: Bytes) -> Result<PutResult> {
|
|
||||||
if location.primary_only() {
|
|
||||||
self.primary.put(location, bytes).await
|
|
||||||
} else {
|
|
||||||
self.secondary.put(location, bytes.clone()).await?;
|
|
||||||
self.primary.put(location, bytes).await
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn put_opts(
|
async fn put_opts(
|
||||||
&self,
|
&self,
|
||||||
location: &Path,
|
location: &Path,
|
||||||
bytes: Bytes,
|
bytes: PutPayload,
|
||||||
options: PutOptions,
|
options: PutOptions,
|
||||||
) -> Result<PutResult> {
|
) -> Result<PutResult> {
|
||||||
if location.primary_only() {
|
if location.primary_only() {
|
||||||
@@ -97,32 +78,22 @@ impl ObjectStore for MirroringObjectStore {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn put_multipart(
|
async fn put_multipart_opts(
|
||||||
&self,
|
&self,
|
||||||
location: &Path,
|
location: &Path,
|
||||||
) -> Result<(MultipartId, Box<dyn AsyncWrite + Unpin + Send>)> {
|
opts: PutMultipartOpts,
|
||||||
|
) -> Result<Box<dyn MultipartUpload>> {
|
||||||
if location.primary_only() {
|
if location.primary_only() {
|
||||||
return self.primary.put_multipart(location).await;
|
return self.primary.put_multipart_opts(location, opts).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
let (id, stream) = self.secondary.put_multipart(location).await?;
|
let secondary = self
|
||||||
|
.secondary
|
||||||
|
.put_multipart_opts(location, opts.clone())
|
||||||
|
.await?;
|
||||||
|
let primary = self.primary.put_multipart_opts(location, opts).await?;
|
||||||
|
|
||||||
let mirroring_upload = MirroringUpload::new(
|
Ok(Box::new(MirroringUpload { primary, secondary }))
|
||||||
Pin::new(stream),
|
|
||||||
self.primary.clone(),
|
|
||||||
self.secondary.clone(),
|
|
||||||
location.clone(),
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok((id, Box::new(mirroring_upload)))
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn abort_multipart(&self, location: &Path, multipart_id: &MultipartId) -> Result<()> {
|
|
||||||
if location.primary_only() {
|
|
||||||
return self.primary.abort_multipart(location, multipart_id).await;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.secondary.abort_multipart(location, multipart_id).await
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reads are routed to primary only
|
// Reads are routed to primary only
|
||||||
@@ -170,144 +141,28 @@ impl ObjectStore for MirroringObjectStore {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct MirroringUpload {
|
|
||||||
secondary_stream: Pin<Box<dyn AsyncWrite + Unpin + Send>>,
|
|
||||||
|
|
||||||
primary_store: Arc<dyn ObjectStore>,
|
|
||||||
secondary_store: Arc<dyn ObjectStore>,
|
|
||||||
location: Path,
|
|
||||||
|
|
||||||
state: MirroringUploadShutdown,
|
|
||||||
}
|
|
||||||
|
|
||||||
// The state goes from
|
|
||||||
// None
|
|
||||||
// -> (secondary)ShutingDown
|
|
||||||
// -> (secondary)ShutdownDone
|
|
||||||
// -> Uploading(to primary)
|
|
||||||
// -> Done
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum MirroringUploadShutdown {
|
struct MirroringUpload {
|
||||||
None,
|
primary: Box<dyn MultipartUpload>,
|
||||||
ShutingDown,
|
secondary: Box<dyn MultipartUpload>,
|
||||||
ShutdownDone,
|
|
||||||
Uploading(Pin<Box<JoinHandle<()>>>),
|
|
||||||
Completed,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MirroringUpload {
|
#[async_trait]
|
||||||
pub fn new(
|
impl MultipartUpload for MirroringUpload {
|
||||||
secondary_stream: Pin<Box<dyn AsyncWrite + Unpin + Send>>,
|
fn put_part(&mut self, data: PutPayload) -> UploadPart {
|
||||||
primary_store: Arc<dyn ObjectStore>,
|
let put_primary = self.primary.put_part(data.clone());
|
||||||
secondary_store: Arc<dyn ObjectStore>,
|
let put_secondary = self.secondary.put_part(data);
|
||||||
location: Path,
|
Box::pin(put_secondary.and_then(|_| put_primary))
|
||||||
) -> Self {
|
|
||||||
Self {
|
|
||||||
secondary_stream,
|
|
||||||
primary_store,
|
|
||||||
secondary_store,
|
|
||||||
location,
|
|
||||||
state: MirroringUploadShutdown::None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl AsyncWrite for MirroringUpload {
|
|
||||||
fn poll_write(
|
|
||||||
self: Pin<&mut Self>,
|
|
||||||
cx: &mut Context<'_>,
|
|
||||||
buf: &[u8],
|
|
||||||
) -> Poll<Result<usize, std::io::Error>> {
|
|
||||||
if !matches!(self.state, MirroringUploadShutdown::None) {
|
|
||||||
return Poll::Ready(Err(std::io::Error::new(
|
|
||||||
std::io::ErrorKind::Other,
|
|
||||||
"already shutdown",
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
// Write to secondary first
|
|
||||||
let mut_self = self.get_mut();
|
|
||||||
mut_self.secondary_stream.as_mut().poll_write(cx, buf)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), std::io::Error>> {
|
async fn complete(&mut self) -> Result<PutResult> {
|
||||||
if !matches!(self.state, MirroringUploadShutdown::None) {
|
self.secondary.complete().await?;
|
||||||
return Poll::Ready(Err(std::io::Error::new(
|
self.primary.complete().await
|
||||||
std::io::ErrorKind::Other,
|
|
||||||
"already shutdown",
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut_self = self.get_mut();
|
|
||||||
mut_self.secondary_stream.as_mut().poll_flush(cx)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn poll_shutdown(
|
async fn abort(&mut self) -> Result<()> {
|
||||||
self: Pin<&mut Self>,
|
self.secondary.abort().await?;
|
||||||
cx: &mut Context<'_>,
|
self.primary.abort().await
|
||||||
) -> Poll<Result<(), std::io::Error>> {
|
|
||||||
let mut_self = self.get_mut();
|
|
||||||
|
|
||||||
loop {
|
|
||||||
// try to shutdown secondary first
|
|
||||||
match &mut mut_self.state {
|
|
||||||
MirroringUploadShutdown::None | MirroringUploadShutdown::ShutingDown => {
|
|
||||||
match mut_self.secondary_stream.as_mut().poll_shutdown(cx) {
|
|
||||||
Poll::Ready(Ok(())) => {
|
|
||||||
mut_self.state = MirroringUploadShutdown::ShutdownDone;
|
|
||||||
// don't return, no waker is setup
|
|
||||||
}
|
|
||||||
Poll::Ready(Err(e)) => return Poll::Ready(Err(e)),
|
|
||||||
Poll::Pending => {
|
|
||||||
mut_self.state = MirroringUploadShutdown::ShutingDown;
|
|
||||||
return Poll::Pending;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
MirroringUploadShutdown::ShutdownDone => {
|
|
||||||
let primary_store = mut_self.primary_store.clone();
|
|
||||||
let secondary_store = mut_self.secondary_store.clone();
|
|
||||||
let location = mut_self.location.clone();
|
|
||||||
|
|
||||||
let upload_future =
|
|
||||||
Box::pin(tokio::runtime::Handle::current().spawn(async move {
|
|
||||||
let mut source =
|
|
||||||
secondary_store.get(&location).await.unwrap().into_stream();
|
|
||||||
let upload_stream = primary_store.put_multipart(&location).await;
|
|
||||||
let (_, mut stream) = upload_stream.unwrap();
|
|
||||||
|
|
||||||
while let Some(buf) = source.next().await {
|
|
||||||
let buf = buf.unwrap();
|
|
||||||
stream.write_all(&buf).await.unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
stream.shutdown().await.unwrap();
|
|
||||||
}));
|
|
||||||
mut_self.state = MirroringUploadShutdown::Uploading(upload_future);
|
|
||||||
// don't return, no waker is setup
|
|
||||||
}
|
|
||||||
MirroringUploadShutdown::Uploading(ref mut join_handle) => {
|
|
||||||
match join_handle.poll_unpin(cx) {
|
|
||||||
Poll::Ready(Ok(())) => {
|
|
||||||
mut_self.state = MirroringUploadShutdown::Completed;
|
|
||||||
return Poll::Ready(Ok(()));
|
|
||||||
}
|
|
||||||
Poll::Ready(Err(e)) => {
|
|
||||||
mut_self.state = MirroringUploadShutdown::Completed;
|
|
||||||
return Poll::Ready(Err(e.into()));
|
|
||||||
}
|
|
||||||
Poll::Pending => {
|
|
||||||
return Poll::Pending;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
MirroringUploadShutdown::Completed => {
|
|
||||||
return Poll::Ready(Err(std::io::Error::new(
|
|
||||||
std::io::ErrorKind::Other,
|
|
||||||
"shutdown already completed",
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user