Compare commits

...

17 Commits

Author SHA1 Message Date
Chang She
fbd0bc7740 bump version for v0.1.5-python 2023-06-02 09:18:26 -07:00
gsilvestrin
f765a453cf Use fsspec to implement table_names with cloud storage support (#117)
Co-authored-by: Will Jones <willjones127@gmail.com>
2023-06-01 16:56:26 -07:00
gsilvestrin
45b3a14f26 Bumping vectordb to v0.1.3 (#124) 2023-06-01 16:36:11 -07:00
Lei Xu
9965b4564d [Python] Support drop table (#123)
Closes #86
2023-06-01 15:58:45 -07:00
gsilvestrin
0719e4b3fb Revert "refactor: pull node binaries into separate packages (#88)" (#122)
This reverts commit e50b642d80.
2023-06-01 13:53:07 -07:00
Jai
091fb9b665 add existence check (#112) 2023-06-01 11:45:26 -07:00
Chang She
03013a4434 Multimodal search demo (#118)
Slow roasted over 12 hours, Pairs well with #111

---------

Co-authored-by: Chang She <chang@lancedb.com>
2023-06-01 10:34:08 -07:00
gsilvestrin
3e14b357e7 add openai embedding function to nodejs client (#107)
- openai is an optional dependency for lancedb
- added an example to show how to use it
2023-06-01 10:25:00 -07:00
Lei Xu
99cbda8b07 Generate diffusiondb embeddings (#111) 2023-06-01 10:23:29 -07:00
Will Jones
e50b642d80 refactor: pull node binaries into separate packages (#88)
Changes:

* Refactors the Node module to load the shared library from a separate
package. When a user does `npm install vectordb`, the correct optional
dependency is automatically downloaded by npm.
* Brings Rust and Node versions in alignment at 0.1.2.
* Add scripts and instructions to build Linux and MacOS node artifacts
locally.
* Add instructions for publishing the npm module and crates.
2023-06-01 09:17:19 -07:00
gsilvestrin
6d8cf52e01 Better error granularity for table operations (#113) 2023-06-01 09:04:42 -07:00
Akash
53f3882d6e Fixed documentation link for the Youtube Transcripts Jupyter Notebook (#105)
Changed the link to the Youtube Transcripts jupyter notebook path on the
documentation.

Previously it went inside docs/notebooks (which does not exist). I've
modified it to go inside the notebooks folder instead.
2023-06-01 09:00:40 -07:00
Chang She
2b26775ed1 python v0.1.4 2023-05-31 20:11:25 -07:00
Lei Xu
306ada5cb8 Support S3 and GCS from typescript SDK (#106) 2023-05-30 21:32:17 -07:00
gsilvestrin
d3aa8bfbc5 add embedding functions to the nodejs client (#95) 2023-05-26 18:09:20 -07:00
Chang She
04d97347d7 move tantivy-py installation to be separate from wheel (#97)
pypi does not allow packages to be uploaded that has a direct reference

for now we'll just ask the user to install tantivy separately

---------

Co-authored-by: Chang She <chang@lancedb.com>
2023-05-25 17:57:26 -06:00
Chang She
22aa8a93c2 bump version for v0.1.3 2023-05-25 17:01:52 -06:00
39 changed files with 1596 additions and 207 deletions

View File

@@ -30,7 +30,8 @@ jobs:
python-version: 3.${{ matrix.python-minor-version }} python-version: 3.${{ matrix.python-minor-version }}
- name: Install lancedb - name: Install lancedb
run: | run: |
pip install -e ".[fts]" pip install -e .
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
pip install pytest pip install pytest
- name: Run tests - name: Run tests
run: pytest -x -v --durations=30 tests run: pytest -x -v --durations=30 tests
@@ -52,7 +53,8 @@ jobs:
python-version: "3.11" python-version: "3.11"
- name: Install lancedb - name: Install lancedb
run: | run: |
pip install -e ".[fts]" pip install -e .
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
pip install pytest pip install pytest
- name: Run tests - name: Run tests
run: pytest -x -v --durations=30 tests run: pytest -x -v --durations=30 tests

8
Cargo.lock generated
View File

@@ -1052,6 +1052,7 @@ dependencies = [
"paste", "paste",
"petgraph", "petgraph",
"rand", "rand",
"regex",
"uuid", "uuid",
] ]
@@ -1645,9 +1646,9 @@ dependencies = [
[[package]] [[package]]
name = "lance" name = "lance"
version = "0.4.12" version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc96cf89139af6f439a0e28ccd04ddf81be795b79fda3105b7a8952fadeb778e" checksum = "86dda8185bd1ffae7b910c1f68035af23be9b717c52e9cc4de176cd30b47f772"
dependencies = [ dependencies = [
"accelerate-src", "accelerate-src",
"arrow", "arrow",
@@ -1684,6 +1685,7 @@ dependencies = [
"rand", "rand",
"reqwest", "reqwest",
"shellexpand", "shellexpand",
"snafu",
"sqlparser-lance", "sqlparser-lance",
"tokio", "tokio",
"url", "url",
@@ -3362,7 +3364,9 @@ dependencies = [
"arrow-data", "arrow-data",
"arrow-schema", "arrow-schema",
"lance", "lance",
"object_store",
"rand", "rand",
"snafu",
"tempfile", "tempfile",
"tokio", "tokio",
] ]

View File

@@ -6,9 +6,10 @@ to make this available for JS as well.
## Installation ## Installation
To use full text search, you must install the fts optional dependencies: To use full text search, you must install optional dependency tantivy-py:
`pip install lancedb[fts]` # tantivy 0.19.2
pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
## Quickstart ## Quickstart

View File

@@ -38,7 +38,7 @@ result = table.search([100, 100]).limit(2).to_df()
## Complete Demos ## Complete Demos
We will be adding completed demo apps built using LanceDB. We will be adding completed demo apps built using LanceDB.
- [YouTube Transcript Search](../notebooks/youtube_transcript_search.ipynb) - [YouTube Transcript Search](../../notebooks/youtube_transcript_search.ipynb)
## Documentation Quick Links ## Documentation Quick Links

View File

@@ -0,0 +1,41 @@
// Copyright 2023 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
'use strict'
async function example () {
const lancedb = require('vectordb')
// You need to provide an OpenAI API key, here we read it from the OPENAI_API_KEY environment variable
const apiKey = process.env.OPENAI_API_KEY
// The embedding function will create embeddings for the 'text' column(text in this case)
const embedding = new lancedb.OpenAIEmbeddingFunction('text', apiKey)
const db = await lancedb.connect('data/sample-lancedb')
const data = [
{ id: 1, text: 'Black T-Shirt', price: 10 },
{ id: 2, text: 'Leather Jacket', price: 50 }
]
const table = await db.createTable('vectors', data, embedding)
console.log(await db.tableNames())
const results = await table
.search('keeps me warm')
.limit(1)
.execute()
console.log(results[0].text)
}
example().then(_ => { console.log('All done!') })

View File

@@ -0,0 +1,15 @@
{
"name": "vectordb-example-js-openai",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "Lance Devs",
"license": "Apache-2.0",
"dependencies": {
"vectordb": "file:../..",
"openai": "^3.2.1"
}
}

View File

@@ -9,6 +9,6 @@
"author": "Lance Devs", "author": "Lance Devs",
"license": "Apache-2.0", "license": "Apache-2.0",
"dependencies": { "dependencies": {
"vectordb": "^0.1.0" "vectordb": "file:../.."
} }
} }

View File

@@ -17,6 +17,6 @@
"typescript": "*" "typescript": "*"
}, },
"dependencies": { "dependencies": {
"vectordb": "^0.1.0" "vectordb": "file:../.."
} }
} }

470
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.1.1", "version": "0.1.3",
"lockfileVersion": 2, "lockfileVersion": 2,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "vectordb", "name": "vectordb",
"version": "0.1.1", "version": "0.1.3",
"license": "Apache-2.0", "license": "Apache-2.0",
"dependencies": { "dependencies": {
"@apache-arrow/ts": "^12.0.0", "@apache-arrow/ts": "^12.0.0",
@@ -16,6 +16,7 @@
"@types/chai": "^4.3.4", "@types/chai": "^4.3.4",
"@types/mocha": "^10.0.1", "@types/mocha": "^10.0.1",
"@types/node": "^18.16.2", "@types/node": "^18.16.2",
"@types/sinon": "^10.0.15",
"@types/temp": "^0.9.1", "@types/temp": "^0.9.1",
"@typescript-eslint/eslint-plugin": "^5.59.1", "@typescript-eslint/eslint-plugin": "^5.59.1",
"cargo-cp-artifact": "^0.1", "cargo-cp-artifact": "^0.1",
@@ -26,6 +27,8 @@
"eslint-plugin-n": "^15.7.0", "eslint-plugin-n": "^15.7.0",
"eslint-plugin-promise": "^6.1.1", "eslint-plugin-promise": "^6.1.1",
"mocha": "^10.2.0", "mocha": "^10.2.0",
"openai": "^3.2.1",
"sinon": "^15.1.0",
"temp": "^0.9.4", "temp": "^0.9.4",
"ts-node": "^10.9.1", "ts-node": "^10.9.1",
"ts-node-dev": "^2.0.0", "ts-node-dev": "^2.0.0",
@@ -232,6 +235,50 @@
"node": ">= 8" "node": ">= 8"
} }
}, },
"node_modules/@sinonjs/commons": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/@sinonjs/commons/-/commons-3.0.0.tgz",
"integrity": "sha512-jXBtWAF4vmdNmZgD5FoKsVLv3rPgDnLgPbU84LIJ3otV44vJlDRokVng5v8NFJdCf/da9legHcKaRuZs4L7faA==",
"dev": true,
"dependencies": {
"type-detect": "4.0.8"
}
},
"node_modules/@sinonjs/fake-timers": {
"version": "10.2.0",
"resolved": "https://registry.npmjs.org/@sinonjs/fake-timers/-/fake-timers-10.2.0.tgz",
"integrity": "sha512-OPwQlEdg40HAj5KNF8WW6q2KG4Z+cBCZb3m4ninfTZKaBmbIJodviQsDBoYMPHkOyJJMHnOJo5j2+LKDOhOACg==",
"dev": true,
"dependencies": {
"@sinonjs/commons": "^3.0.0"
}
},
"node_modules/@sinonjs/samsam": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/@sinonjs/samsam/-/samsam-8.0.0.tgz",
"integrity": "sha512-Bp8KUVlLp8ibJZrnvq2foVhP0IVX2CIprMJPK0vqGqgrDa0OHVKeZyBykqskkrdxV6yKBPmGasO8LVjAKR3Gew==",
"dev": true,
"dependencies": {
"@sinonjs/commons": "^2.0.0",
"lodash.get": "^4.4.2",
"type-detect": "^4.0.8"
}
},
"node_modules/@sinonjs/samsam/node_modules/@sinonjs/commons": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/@sinonjs/commons/-/commons-2.0.0.tgz",
"integrity": "sha512-uLa0j859mMrg2slwQYdO/AkrOfmH+X6LTVmNTS9CqexuE2IvVORIkSpJLqePAbEnKJ77aMmCwr1NUZ57120Xcg==",
"dev": true,
"dependencies": {
"type-detect": "4.0.8"
}
},
"node_modules/@sinonjs/text-encoding": {
"version": "0.7.2",
"resolved": "https://registry.npmjs.org/@sinonjs/text-encoding/-/text-encoding-0.7.2.tgz",
"integrity": "sha512-sXXKG+uL9IrKqViTtao2Ws6dy0znu9sOaP1di/jKGW1M6VssO8vlpXCQcpZ+jisQ1tTFAC5Jo/EOzFbggBagFQ==",
"dev": true
},
"node_modules/@tsconfig/node10": { "node_modules/@tsconfig/node10": {
"version": "1.0.9", "version": "1.0.9",
"resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.9.tgz", "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.9.tgz",
@@ -307,6 +354,21 @@
"integrity": "sha512-21cFJr9z3g5dW8B0CVI9g2O9beqaThGQ6ZFBqHfwhzLDKUxaqTIy3vnfah/UPkfOiF2pLq+tGz+W8RyCskuslw==", "integrity": "sha512-21cFJr9z3g5dW8B0CVI9g2O9beqaThGQ6ZFBqHfwhzLDKUxaqTIy3vnfah/UPkfOiF2pLq+tGz+W8RyCskuslw==",
"dev": true "dev": true
}, },
"node_modules/@types/sinon": {
"version": "10.0.15",
"resolved": "https://registry.npmjs.org/@types/sinon/-/sinon-10.0.15.tgz",
"integrity": "sha512-3lrFNQG0Kr2LDzvjyjB6AMJk4ge+8iYhQfdnSwIwlG88FUOV43kPcQqDZkDa/h3WSZy6i8Fr0BSjfQtB1B3xuQ==",
"dev": true,
"dependencies": {
"@types/sinonjs__fake-timers": "*"
}
},
"node_modules/@types/sinonjs__fake-timers": {
"version": "8.1.2",
"resolved": "https://registry.npmjs.org/@types/sinonjs__fake-timers/-/sinonjs__fake-timers-8.1.2.tgz",
"integrity": "sha512-9GcLXF0/v3t80caGs5p2rRfkB+a8VBGLJZVih6CNFkx8IZ994wiKKLSRs9nuFwk1HevWs/1mnUmkApGrSGsShA==",
"dev": true
},
"node_modules/@types/strip-bom": { "node_modules/@types/strip-bom": {
"version": "3.0.0", "version": "3.0.0",
"resolved": "https://registry.npmjs.org/@types/strip-bom/-/strip-bom-3.0.0.tgz", "resolved": "https://registry.npmjs.org/@types/strip-bom/-/strip-bom-3.0.0.tgz",
@@ -744,6 +806,12 @@
"node": "*" "node": "*"
} }
}, },
"node_modules/asynckit": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
"dev": true
},
"node_modules/available-typed-arrays": { "node_modules/available-typed-arrays": {
"version": "1.0.5", "version": "1.0.5",
"resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.5.tgz", "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.5.tgz",
@@ -756,6 +824,15 @@
"url": "https://github.com/sponsors/ljharb" "url": "https://github.com/sponsors/ljharb"
} }
}, },
"node_modules/axios": {
"version": "0.26.1",
"resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
"integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
"dev": true,
"dependencies": {
"follow-redirects": "^1.14.8"
}
},
"node_modules/balanced-match": { "node_modules/balanced-match": {
"version": "1.0.2", "version": "1.0.2",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
@@ -968,6 +1045,18 @@
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
"dev": true "dev": true
}, },
"node_modules/combined-stream": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
"dev": true,
"dependencies": {
"delayed-stream": "~1.0.0"
},
"engines": {
"node": ">= 0.8"
}
},
"node_modules/command-line-args": { "node_modules/command-line-args": {
"version": "5.2.1", "version": "5.2.1",
"resolved": "https://registry.npmjs.org/command-line-args/-/command-line-args-5.2.1.tgz", "resolved": "https://registry.npmjs.org/command-line-args/-/command-line-args-5.2.1.tgz",
@@ -1179,6 +1268,15 @@
"url": "https://github.com/sponsors/ljharb" "url": "https://github.com/sponsors/ljharb"
} }
}, },
"node_modules/delayed-stream": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
"dev": true,
"engines": {
"node": ">=0.4.0"
}
},
"node_modules/diff": { "node_modules/diff": {
"version": "4.0.2", "version": "4.0.2",
"resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz", "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz",
@@ -1942,6 +2040,26 @@
"integrity": "sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ==", "integrity": "sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ==",
"dev": true "dev": true
}, },
"node_modules/follow-redirects": {
"version": "1.15.2",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
"integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==",
"dev": true,
"funding": [
{
"type": "individual",
"url": "https://github.com/sponsors/RubenVerborgh"
}
],
"engines": {
"node": ">=4.0"
},
"peerDependenciesMeta": {
"debug": {
"optional": true
}
}
},
"node_modules/for-each": { "node_modules/for-each": {
"version": "0.3.3", "version": "0.3.3",
"resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.3.tgz", "resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.3.tgz",
@@ -1951,6 +2069,20 @@
"is-callable": "^1.1.3" "is-callable": "^1.1.3"
} }
}, },
"node_modules/form-data": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
"dev": true,
"dependencies": {
"asynckit": "^0.4.0",
"combined-stream": "^1.0.8",
"mime-types": "^2.1.12"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/fs.realpath": { "node_modules/fs.realpath": {
"version": "1.0.0", "version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
@@ -2584,6 +2716,12 @@
"url": "https://github.com/sponsors/ljharb" "url": "https://github.com/sponsors/ljharb"
} }
}, },
"node_modules/isarray": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz",
"integrity": "sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ==",
"dev": true
},
"node_modules/isexe": { "node_modules/isexe": {
"version": "2.0.0", "version": "2.0.0",
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
@@ -2644,6 +2782,12 @@
"json5": "lib/cli.js" "json5": "lib/cli.js"
} }
}, },
"node_modules/just-extend": {
"version": "4.2.1",
"resolved": "https://registry.npmjs.org/just-extend/-/just-extend-4.2.1.tgz",
"integrity": "sha512-g3UB796vUFIY90VIv/WX3L2c8CS2MdWUww3CNrYmqza1Fg0DURc2K/O4YrnklBdQarSJ/y8JnJYDGc+1iumQjg==",
"dev": true
},
"node_modules/levn": { "node_modules/levn": {
"version": "0.4.1", "version": "0.4.1",
"resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
@@ -2677,6 +2821,12 @@
"resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz", "resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz",
"integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==" "integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA=="
}, },
"node_modules/lodash.get": {
"version": "4.4.2",
"resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz",
"integrity": "sha512-z+Uw/vLuy6gQe8cfaFWD7p0wVv8fJl3mbzXh33RS+0oW2wvUqiRXiQ69gLWSLpgB5/6sU+r6BlQR0MBILadqTQ==",
"dev": true
},
"node_modules/lodash.merge": { "node_modules/lodash.merge": {
"version": "4.6.2", "version": "4.6.2",
"resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz",
@@ -2748,6 +2898,27 @@
"node": ">=8.6" "node": ">=8.6"
} }
}, },
"node_modules/mime-db": {
"version": "1.52.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
"dev": true,
"engines": {
"node": ">= 0.6"
}
},
"node_modules/mime-types": {
"version": "2.1.35",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
"dev": true,
"dependencies": {
"mime-db": "1.52.0"
},
"engines": {
"node": ">= 0.6"
}
},
"node_modules/minimatch": { "node_modules/minimatch": {
"version": "3.1.2", "version": "3.1.2",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
@@ -2925,6 +3096,28 @@
"integrity": "sha512-Tj+HTDSJJKaZnfiuw+iaF9skdPpTo2GtEly5JHnWV/hfv2Qj/9RKsGISQtLh2ox3l5EAGw487hnBee0sIJ6v2g==", "integrity": "sha512-Tj+HTDSJJKaZnfiuw+iaF9skdPpTo2GtEly5JHnWV/hfv2Qj/9RKsGISQtLh2ox3l5EAGw487hnBee0sIJ6v2g==",
"dev": true "dev": true
}, },
"node_modules/nise": {
"version": "5.1.4",
"resolved": "https://registry.npmjs.org/nise/-/nise-5.1.4.tgz",
"integrity": "sha512-8+Ib8rRJ4L0o3kfmyVCL7gzrohyDe0cMFTBa2d364yIrEGMEoetznKJx899YxjybU6bL9SQkYPSBBs1gyYs8Xg==",
"dev": true,
"dependencies": {
"@sinonjs/commons": "^2.0.0",
"@sinonjs/fake-timers": "^10.0.2",
"@sinonjs/text-encoding": "^0.7.1",
"just-extend": "^4.0.2",
"path-to-regexp": "^1.7.0"
}
},
"node_modules/nise/node_modules/@sinonjs/commons": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/@sinonjs/commons/-/commons-2.0.0.tgz",
"integrity": "sha512-uLa0j859mMrg2slwQYdO/AkrOfmH+X6LTVmNTS9CqexuE2IvVORIkSpJLqePAbEnKJ77aMmCwr1NUZ57120Xcg==",
"dev": true,
"dependencies": {
"type-detect": "4.0.8"
}
},
"node_modules/normalize-path": { "node_modules/normalize-path": {
"version": "3.0.0", "version": "3.0.0",
"resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz",
@@ -2996,6 +3189,16 @@
"wrappy": "1" "wrappy": "1"
} }
}, },
"node_modules/openai": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz",
"integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==",
"dev": true,
"dependencies": {
"axios": "^0.26.0",
"form-data": "^4.0.0"
}
},
"node_modules/optionator": { "node_modules/optionator": {
"version": "0.9.1", "version": "0.9.1",
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz",
@@ -3099,6 +3302,15 @@
"integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==",
"dev": true "dev": true
}, },
"node_modules/path-to-regexp": {
"version": "1.8.0",
"resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-1.8.0.tgz",
"integrity": "sha512-n43JRhlUKUAlibEJhPeir1ncUID16QnEjNpwzNdO3Lm4ywrBpBZ5oLD0I6br9evr1Y9JTqwRtAh7JLoOzAQdVA==",
"dev": true,
"dependencies": {
"isarray": "0.0.1"
}
},
"node_modules/path-type": { "node_modules/path-type": {
"version": "4.0.0", "version": "4.0.0",
"resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz",
@@ -3406,6 +3618,45 @@
"url": "https://github.com/sponsors/ljharb" "url": "https://github.com/sponsors/ljharb"
} }
}, },
"node_modules/sinon": {
"version": "15.1.0",
"resolved": "https://registry.npmjs.org/sinon/-/sinon-15.1.0.tgz",
"integrity": "sha512-cS5FgpDdE9/zx7no8bxROHymSlPLZzq0ChbbLk1DrxBfc+eTeBK3y8nIL+nu/0QeYydhhbLIr7ecHJpywjQaoQ==",
"dev": true,
"dependencies": {
"@sinonjs/commons": "^3.0.0",
"@sinonjs/fake-timers": "^10.2.0",
"@sinonjs/samsam": "^8.0.0",
"diff": "^5.1.0",
"nise": "^5.1.4",
"supports-color": "^7.2.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/sinon"
}
},
"node_modules/sinon/node_modules/diff": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/diff/-/diff-5.1.0.tgz",
"integrity": "sha512-D+mk+qE8VC/PAUrlAU34N+VfXev0ghe5ywmpqrawphmVZc1bEfn56uo9qpyGp1p4xpzOHkSW4ztBd6L7Xx4ACw==",
"dev": true,
"engines": {
"node": ">=0.3.1"
}
},
"node_modules/sinon/node_modules/supports-color": {
"version": "7.2.0",
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
"integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
"dev": true,
"dependencies": {
"has-flag": "^4.0.0"
},
"engines": {
"node": ">=8"
}
},
"node_modules/slash": { "node_modules/slash": {
"version": "3.0.0", "version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
@@ -4217,6 +4468,52 @@
"fastq": "^1.6.0" "fastq": "^1.6.0"
} }
}, },
"@sinonjs/commons": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/@sinonjs/commons/-/commons-3.0.0.tgz",
"integrity": "sha512-jXBtWAF4vmdNmZgD5FoKsVLv3rPgDnLgPbU84LIJ3otV44vJlDRokVng5v8NFJdCf/da9legHcKaRuZs4L7faA==",
"dev": true,
"requires": {
"type-detect": "4.0.8"
}
},
"@sinonjs/fake-timers": {
"version": "10.2.0",
"resolved": "https://registry.npmjs.org/@sinonjs/fake-timers/-/fake-timers-10.2.0.tgz",
"integrity": "sha512-OPwQlEdg40HAj5KNF8WW6q2KG4Z+cBCZb3m4ninfTZKaBmbIJodviQsDBoYMPHkOyJJMHnOJo5j2+LKDOhOACg==",
"dev": true,
"requires": {
"@sinonjs/commons": "^3.0.0"
}
},
"@sinonjs/samsam": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/@sinonjs/samsam/-/samsam-8.0.0.tgz",
"integrity": "sha512-Bp8KUVlLp8ibJZrnvq2foVhP0IVX2CIprMJPK0vqGqgrDa0OHVKeZyBykqskkrdxV6yKBPmGasO8LVjAKR3Gew==",
"dev": true,
"requires": {
"@sinonjs/commons": "^2.0.0",
"lodash.get": "^4.4.2",
"type-detect": "^4.0.8"
},
"dependencies": {
"@sinonjs/commons": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/@sinonjs/commons/-/commons-2.0.0.tgz",
"integrity": "sha512-uLa0j859mMrg2slwQYdO/AkrOfmH+X6LTVmNTS9CqexuE2IvVORIkSpJLqePAbEnKJ77aMmCwr1NUZ57120Xcg==",
"dev": true,
"requires": {
"type-detect": "4.0.8"
}
}
}
},
"@sinonjs/text-encoding": {
"version": "0.7.2",
"resolved": "https://registry.npmjs.org/@sinonjs/text-encoding/-/text-encoding-0.7.2.tgz",
"integrity": "sha512-sXXKG+uL9IrKqViTtao2Ws6dy0znu9sOaP1di/jKGW1M6VssO8vlpXCQcpZ+jisQ1tTFAC5Jo/EOzFbggBagFQ==",
"dev": true
},
"@tsconfig/node10": { "@tsconfig/node10": {
"version": "1.0.9", "version": "1.0.9",
"resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.9.tgz", "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.9.tgz",
@@ -4292,6 +4589,21 @@
"integrity": "sha512-21cFJr9z3g5dW8B0CVI9g2O9beqaThGQ6ZFBqHfwhzLDKUxaqTIy3vnfah/UPkfOiF2pLq+tGz+W8RyCskuslw==", "integrity": "sha512-21cFJr9z3g5dW8B0CVI9g2O9beqaThGQ6ZFBqHfwhzLDKUxaqTIy3vnfah/UPkfOiF2pLq+tGz+W8RyCskuslw==",
"dev": true "dev": true
}, },
"@types/sinon": {
"version": "10.0.15",
"resolved": "https://registry.npmjs.org/@types/sinon/-/sinon-10.0.15.tgz",
"integrity": "sha512-3lrFNQG0Kr2LDzvjyjB6AMJk4ge+8iYhQfdnSwIwlG88FUOV43kPcQqDZkDa/h3WSZy6i8Fr0BSjfQtB1B3xuQ==",
"dev": true,
"requires": {
"@types/sinonjs__fake-timers": "*"
}
},
"@types/sinonjs__fake-timers": {
"version": "8.1.2",
"resolved": "https://registry.npmjs.org/@types/sinonjs__fake-timers/-/sinonjs__fake-timers-8.1.2.tgz",
"integrity": "sha512-9GcLXF0/v3t80caGs5p2rRfkB+a8VBGLJZVih6CNFkx8IZ994wiKKLSRs9nuFwk1HevWs/1mnUmkApGrSGsShA==",
"dev": true
},
"@types/strip-bom": { "@types/strip-bom": {
"version": "3.0.0", "version": "3.0.0",
"resolved": "https://registry.npmjs.org/@types/strip-bom/-/strip-bom-3.0.0.tgz", "resolved": "https://registry.npmjs.org/@types/strip-bom/-/strip-bom-3.0.0.tgz",
@@ -4579,12 +4891,27 @@
"integrity": "sha512-jgsaNduz+ndvGyFt3uSuWqvy4lCnIJiovtouQN5JZHOKCS2QuhEdbcQHFhVksz2N2U9hXJo8odG7ETyWlEeuDw==", "integrity": "sha512-jgsaNduz+ndvGyFt3uSuWqvy4lCnIJiovtouQN5JZHOKCS2QuhEdbcQHFhVksz2N2U9hXJo8odG7ETyWlEeuDw==",
"dev": true "dev": true
}, },
"asynckit": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
"dev": true
},
"available-typed-arrays": { "available-typed-arrays": {
"version": "1.0.5", "version": "1.0.5",
"resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.5.tgz", "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.5.tgz",
"integrity": "sha512-DMD0KiN46eipeziST1LPP/STfDU0sufISXmjSgvVsoU2tqxctQeASejWcfNtxYKqETM1UxQ8sp2OrSBWpHY6sw==", "integrity": "sha512-DMD0KiN46eipeziST1LPP/STfDU0sufISXmjSgvVsoU2tqxctQeASejWcfNtxYKqETM1UxQ8sp2OrSBWpHY6sw==",
"dev": true "dev": true
}, },
"axios": {
"version": "0.26.1",
"resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
"integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
"dev": true,
"requires": {
"follow-redirects": "^1.14.8"
}
},
"balanced-match": { "balanced-match": {
"version": "1.0.2", "version": "1.0.2",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
@@ -4749,6 +5076,15 @@
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
"dev": true "dev": true
}, },
"combined-stream": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
"dev": true,
"requires": {
"delayed-stream": "~1.0.0"
}
},
"command-line-args": { "command-line-args": {
"version": "5.2.1", "version": "5.2.1",
"resolved": "https://registry.npmjs.org/command-line-args/-/command-line-args-5.2.1.tgz", "resolved": "https://registry.npmjs.org/command-line-args/-/command-line-args-5.2.1.tgz",
@@ -4908,6 +5244,12 @@
"object-keys": "^1.1.1" "object-keys": "^1.1.1"
} }
}, },
"delayed-stream": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
"dev": true
},
"diff": { "diff": {
"version": "4.0.2", "version": "4.0.2",
"resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz", "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz",
@@ -5475,6 +5817,12 @@
"integrity": "sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ==", "integrity": "sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ==",
"dev": true "dev": true
}, },
"follow-redirects": {
"version": "1.15.2",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
"integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==",
"dev": true
},
"for-each": { "for-each": {
"version": "0.3.3", "version": "0.3.3",
"resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.3.tgz", "resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.3.tgz",
@@ -5484,6 +5832,17 @@
"is-callable": "^1.1.3" "is-callable": "^1.1.3"
} }
}, },
"form-data": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
"dev": true,
"requires": {
"asynckit": "^0.4.0",
"combined-stream": "^1.0.8",
"mime-types": "^2.1.12"
}
},
"fs.realpath": { "fs.realpath": {
"version": "1.0.0", "version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
@@ -5912,6 +6271,12 @@
"call-bind": "^1.0.2" "call-bind": "^1.0.2"
} }
}, },
"isarray": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz",
"integrity": "sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ==",
"dev": true
},
"isexe": { "isexe": {
"version": "2.0.0", "version": "2.0.0",
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
@@ -5959,6 +6324,12 @@
"minimist": "^1.2.0" "minimist": "^1.2.0"
} }
}, },
"just-extend": {
"version": "4.2.1",
"resolved": "https://registry.npmjs.org/just-extend/-/just-extend-4.2.1.tgz",
"integrity": "sha512-g3UB796vUFIY90VIv/WX3L2c8CS2MdWUww3CNrYmqza1Fg0DURc2K/O4YrnklBdQarSJ/y8JnJYDGc+1iumQjg==",
"dev": true
},
"levn": { "levn": {
"version": "0.4.1", "version": "0.4.1",
"resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
@@ -5983,6 +6354,12 @@
"resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz", "resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz",
"integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==" "integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA=="
}, },
"lodash.get": {
"version": "4.4.2",
"resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz",
"integrity": "sha512-z+Uw/vLuy6gQe8cfaFWD7p0wVv8fJl3mbzXh33RS+0oW2wvUqiRXiQ69gLWSLpgB5/6sU+r6BlQR0MBILadqTQ==",
"dev": true
},
"lodash.merge": { "lodash.merge": {
"version": "4.6.2", "version": "4.6.2",
"resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz",
@@ -6039,6 +6416,21 @@
"picomatch": "^2.3.1" "picomatch": "^2.3.1"
} }
}, },
"mime-db": {
"version": "1.52.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
"dev": true
},
"mime-types": {
"version": "2.1.35",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
"dev": true,
"requires": {
"mime-db": "1.52.0"
}
},
"minimatch": { "minimatch": {
"version": "3.1.2", "version": "3.1.2",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
@@ -6172,6 +6564,30 @@
"integrity": "sha512-Tj+HTDSJJKaZnfiuw+iaF9skdPpTo2GtEly5JHnWV/hfv2Qj/9RKsGISQtLh2ox3l5EAGw487hnBee0sIJ6v2g==", "integrity": "sha512-Tj+HTDSJJKaZnfiuw+iaF9skdPpTo2GtEly5JHnWV/hfv2Qj/9RKsGISQtLh2ox3l5EAGw487hnBee0sIJ6v2g==",
"dev": true "dev": true
}, },
"nise": {
"version": "5.1.4",
"resolved": "https://registry.npmjs.org/nise/-/nise-5.1.4.tgz",
"integrity": "sha512-8+Ib8rRJ4L0o3kfmyVCL7gzrohyDe0cMFTBa2d364yIrEGMEoetznKJx899YxjybU6bL9SQkYPSBBs1gyYs8Xg==",
"dev": true,
"requires": {
"@sinonjs/commons": "^2.0.0",
"@sinonjs/fake-timers": "^10.0.2",
"@sinonjs/text-encoding": "^0.7.1",
"just-extend": "^4.0.2",
"path-to-regexp": "^1.7.0"
},
"dependencies": {
"@sinonjs/commons": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/@sinonjs/commons/-/commons-2.0.0.tgz",
"integrity": "sha512-uLa0j859mMrg2slwQYdO/AkrOfmH+X6LTVmNTS9CqexuE2IvVORIkSpJLqePAbEnKJ77aMmCwr1NUZ57120Xcg==",
"dev": true,
"requires": {
"type-detect": "4.0.8"
}
}
}
},
"normalize-path": { "normalize-path": {
"version": "3.0.0", "version": "3.0.0",
"resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz",
@@ -6222,6 +6638,16 @@
"wrappy": "1" "wrappy": "1"
} }
}, },
"openai": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz",
"integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==",
"dev": true,
"requires": {
"axios": "^0.26.0",
"form-data": "^4.0.0"
}
},
"optionator": { "optionator": {
"version": "0.9.1", "version": "0.9.1",
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz",
@@ -6295,6 +6721,15 @@
"integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==",
"dev": true "dev": true
}, },
"path-to-regexp": {
"version": "1.8.0",
"resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-1.8.0.tgz",
"integrity": "sha512-n43JRhlUKUAlibEJhPeir1ncUID16QnEjNpwzNdO3Lm4ywrBpBZ5oLD0I6br9evr1Y9JTqwRtAh7JLoOzAQdVA==",
"dev": true,
"requires": {
"isarray": "0.0.1"
}
},
"path-type": { "path-type": {
"version": "4.0.0", "version": "4.0.0",
"resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz",
@@ -6484,6 +6919,37 @@
"object-inspect": "^1.9.0" "object-inspect": "^1.9.0"
} }
}, },
"sinon": {
"version": "15.1.0",
"resolved": "https://registry.npmjs.org/sinon/-/sinon-15.1.0.tgz",
"integrity": "sha512-cS5FgpDdE9/zx7no8bxROHymSlPLZzq0ChbbLk1DrxBfc+eTeBK3y8nIL+nu/0QeYydhhbLIr7ecHJpywjQaoQ==",
"dev": true,
"requires": {
"@sinonjs/commons": "^3.0.0",
"@sinonjs/fake-timers": "^10.2.0",
"@sinonjs/samsam": "^8.0.0",
"diff": "^5.1.0",
"nise": "^5.1.4",
"supports-color": "^7.2.0"
},
"dependencies": {
"diff": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/diff/-/diff-5.1.0.tgz",
"integrity": "sha512-D+mk+qE8VC/PAUrlAU34N+VfXev0ghe5ywmpqrawphmVZc1bEfn56uo9qpyGp1p4xpzOHkSW4ztBd6L7Xx4ACw==",
"dev": true
},
"supports-color": {
"version": "7.2.0",
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
"integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
"dev": true,
"requires": {
"has-flag": "^4.0.0"
}
}
}
},
"slash": { "slash": {
"version": "3.0.0", "version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",

View File

@@ -1,6 +1,6 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.1.1", "version": "0.1.3",
"description": " Serverless, low-latency vector database for AI applications", "description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/index.d.ts", "types": "dist/index.d.ts",
@@ -27,6 +27,7 @@
"@types/chai": "^4.3.4", "@types/chai": "^4.3.4",
"@types/mocha": "^10.0.1", "@types/mocha": "^10.0.1",
"@types/node": "^18.16.2", "@types/node": "^18.16.2",
"@types/sinon": "^10.0.15",
"@types/temp": "^0.9.1", "@types/temp": "^0.9.1",
"@typescript-eslint/eslint-plugin": "^5.59.1", "@typescript-eslint/eslint-plugin": "^5.59.1",
"cargo-cp-artifact": "^0.1", "cargo-cp-artifact": "^0.1",
@@ -37,6 +38,8 @@
"eslint-plugin-n": "^15.7.0", "eslint-plugin-n": "^15.7.0",
"eslint-plugin-promise": "^6.1.1", "eslint-plugin-promise": "^6.1.1",
"mocha": "^10.2.0", "mocha": "^10.2.0",
"sinon": "^15.1.0",
"openai": "^3.2.1",
"temp": "^0.9.4", "temp": "^0.9.4",
"ts-node": "^10.9.1", "ts-node": "^10.9.1",
"ts-node-dev": "^2.0.0", "ts-node-dev": "^2.0.0",

View File

@@ -15,15 +15,16 @@
import { import {
Field, Field,
Float32, Float32,
List, List, type ListBuilder,
makeBuilder, makeBuilder,
RecordBatchFileWriter, RecordBatchFileWriter,
Table, Utf8, Table, Utf8,
type Vector, type Vector,
vectorFromArray vectorFromArray
} from 'apache-arrow' } from 'apache-arrow'
import { type EmbeddingFunction } from './index'
export function convertToTable (data: Array<Record<string, unknown>>): Table { export async function convertToTable<T> (data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Promise<Table> {
if (data.length === 0) { if (data.length === 0) {
throw new Error('At least one record needs to be provided') throw new Error('At least one record needs to be provided')
} }
@@ -33,11 +34,7 @@ export function convertToTable (data: Array<Record<string, unknown>>): Table {
for (const columnsKey of columns) { for (const columnsKey of columns) {
if (columnsKey === 'vector') { if (columnsKey === 'vector') {
const children = new Field<Float32>('item', new Float32()) const listBuilder = newVectorListBuilder()
const list = new List(children)
const listBuilder = makeBuilder({
type: list
})
const vectorSize = (data[0].vector as any[]).length const vectorSize = (data[0].vector as any[]).length
for (const datum of data) { for (const datum of data) {
if ((datum[columnsKey] as any[]).length !== vectorSize) { if ((datum[columnsKey] as any[]).length !== vectorSize) {
@@ -52,6 +49,14 @@ export function convertToTable (data: Array<Record<string, unknown>>): Table {
for (const datum of data) { for (const datum of data) {
values.push(datum[columnsKey]) values.push(datum[columnsKey])
} }
if (columnsKey === embeddings?.sourceColumn) {
const vectors = await embeddings.embed(values as T[])
const listBuilder = newVectorListBuilder()
vectors.map(v => listBuilder.append(v))
records.vector = listBuilder.finish().toVector()
}
if (typeof values[0] === 'string') { if (typeof values[0] === 'string') {
// `vectorFromArray` converts strings into dictionary vectors, forcing it back to a string column // `vectorFromArray` converts strings into dictionary vectors, forcing it back to a string column
records[columnsKey] = vectorFromArray(values, new Utf8()) records[columnsKey] = vectorFromArray(values, new Utf8())
@@ -64,8 +69,17 @@ export function convertToTable (data: Array<Record<string, unknown>>): Table {
return new Table(records) return new Table(records)
} }
export async function fromRecordsToBuffer (data: Array<Record<string, unknown>>): Promise<Buffer> { // Creates a new Arrow ListBuilder that stores a Vector column
const table = convertToTable(data) function newVectorListBuilder (): ListBuilder<Float32, any> {
const children = new Field<Float32>('item', new Float32())
const list = new List(children)
return makeBuilder({
type: list
})
}
export async function fromRecordsToBuffer<T> (data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Promise<Buffer> {
const table = await convertToTable(data, embeddings)
const writer = RecordBatchFileWriter.writeAll(table) const writer = RecordBatchFileWriter.writeAll(table)
return Buffer.from(await writer.toUint8Array()) return Buffer.from(await writer.toUint8Array())
} }

View File

@@ -0,0 +1,28 @@
// Copyright 2023 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/**
* An embedding function that automatically creates vector representation for a given column.
*/
export interface EmbeddingFunction<T> {
/**
* The name of the column that will be used as input for the Embedding Function.
*/
sourceColumn: string
/**
* Creates a vector representation for the given values.
*/
embed: (data: T[]) => Promise<number[][]>
}

View File

@@ -0,0 +1,51 @@
// Copyright 2023 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { type EmbeddingFunction } from '../index'
export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
private readonly _openai: any
private readonly _modelName: string
constructor (sourceColumn: string, openAIKey: string, modelName: string = 'text-embedding-ada-002') {
let openai
try {
// eslint-disable-next-line @typescript-eslint/no-var-requires
openai = require('openai')
} catch {
throw new Error('please install openai using npm install openai')
}
this.sourceColumn = sourceColumn
const configuration = new openai.Configuration({
apiKey: openAIKey
})
this._openai = new openai.OpenAIApi(configuration)
this._modelName = modelName
}
async embed (data: string[]): Promise<number[][]> {
const response = await this._openai.createEmbedding({
model: this._modelName,
input: data
})
const embeddings: number[][] = []
for (let i = 0; i < response.data.data.length; i++) {
embeddings.push(response.data.data[i].embedding as number[])
}
return embeddings
}
sourceColumn: string
}

View File

@@ -19,16 +19,21 @@ import {
Vector Vector
} from 'apache-arrow' } from 'apache-arrow'
import { fromRecordsToBuffer } from './arrow' import { fromRecordsToBuffer } from './arrow'
import type { EmbeddingFunction } from './embedding/embedding_function'
// eslint-disable-next-line @typescript-eslint/no-var-requires // eslint-disable-next-line @typescript-eslint/no-var-requires
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch, tableAdd, tableCreateVectorIndex } = require('../native.js') const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch, tableAdd, tableCreateVectorIndex } = require('../native.js')
export type { EmbeddingFunction }
export { OpenAIEmbeddingFunction } from './embedding/openai'
/** /**
* Connect to a LanceDB instance at the given URI * Connect to a LanceDB instance at the given URI
* @param uri The uri of the database. * @param uri The uri of the database.
*/ */
export async function connect (uri: string): Promise<Connection> { export async function connect (uri: string): Promise<Connection> {
return new Connection(uri) const db = await databaseNew(uri)
return new Connection(db, uri)
} }
/** /**
@@ -38,9 +43,9 @@ export class Connection {
private readonly _uri: string private readonly _uri: string
private readonly _db: any private readonly _db: any
constructor (uri: string) { constructor (db: any, uri: string) {
this._uri = uri this._uri = uri
this._db = databaseNew(uri) this._db = db
} }
get uri (): string { get uri (): string {
@@ -56,16 +61,49 @@ export class Connection {
/** /**
* Open a table in the database. * Open a table in the database.
*
* @param name The name of the table. * @param name The name of the table.
*/ */
async openTable (name: string): Promise<Table> { async openTable (name: string): Promise<Table>
/**
* Open a table in the database.
*
* @param name The name of the table.
* @param embeddings An embedding function to use on this Table
*/
async openTable<T> (name: string, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
async openTable<T> (name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
const tbl = await databaseOpenTable.call(this._db, name) const tbl = await databaseOpenTable.call(this._db, name)
if (embeddings !== undefined) {
return new Table(tbl, name, embeddings)
} else {
return new Table(tbl, name) return new Table(tbl, name)
} }
}
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table> { /**
await tableCreate.call(this._db, name, await fromRecordsToBuffer(data)) * Creates a new Table and initialize it with new data.
return await this.openTable(name) *
* @param name The name of the table.
* @param data Non-empty Array of Records to be inserted into the Table
*/
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table>
/**
* Creates a new Table and initialize it with new data.
*
* @param name The name of the table.
* @param data Non-empty Array of Records to be inserted into the Table
* @param embeddings An embedding function to use on this Table
*/
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
const tbl = await tableCreate.call(this._db, name, await fromRecordsToBuffer(data, embeddings))
if (embeddings !== undefined) {
return new Table(tbl, name, embeddings)
} else {
return new Table(tbl, name)
}
} }
async createTableArrow (name: string, table: ArrowTable): Promise<Table> { async createTableArrow (name: string, table: ArrowTable): Promise<Table> {
@@ -75,16 +113,22 @@ export class Connection {
} }
} }
/** export class Table<T = number[]> {
* A table in a LanceDB database.
*/
export class Table {
private readonly _tbl: any private readonly _tbl: any
private readonly _name: string private readonly _name: string
private readonly _embeddings?: EmbeddingFunction<T>
constructor (tbl: any, name: string) { constructor (tbl: any, name: string)
/**
* @param tbl
* @param name
* @param embeddings An embedding function to use when interacting with this table
*/
constructor (tbl: any, name: string, embeddings: EmbeddingFunction<T>)
constructor (tbl: any, name: string, embeddings?: EmbeddingFunction<T>) {
this._tbl = tbl this._tbl = tbl
this._name = name this._name = name
this._embeddings = embeddings
} }
get name (): string { get name (): string {
@@ -92,11 +136,11 @@ export class Table {
} }
/** /**
* Create a search query to find the nearest neighbors of the given query vector. * Creates a search query to find the nearest neighbors of the given search term
* @param queryVector The query vector. * @param query The query search term
*/ */
search (queryVector: number[]): Query { search (query: T): Query<T> {
return new Query(this._tbl, queryVector) return new Query(this._tbl, query, this._embeddings)
} }
/** /**
@@ -106,7 +150,7 @@ export class Table {
* @return The number of rows added to the table * @return The number of rows added to the table
*/ */
async add (data: Array<Record<string, unknown>>): Promise<number> { async add (data: Array<Record<string, unknown>>): Promise<number> {
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data), WriteMode.Append.toString()) return tableAdd.call(this._tbl, await fromRecordsToBuffer(data, this._embeddings), WriteMode.Append.toString())
} }
/** /**
@@ -116,9 +160,14 @@ export class Table {
* @return The number of rows added to the table * @return The number of rows added to the table
*/ */
async overwrite (data: Array<Record<string, unknown>>): Promise<number> { async overwrite (data: Array<Record<string, unknown>>): Promise<number> {
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data), WriteMode.Overwrite.toString()) return tableAdd.call(this._tbl, await fromRecordsToBuffer(data, this._embeddings), WriteMode.Overwrite.toString())
} }
/**
* Create an ANN index on this Table vector index.
*
* @param indexParams The parameters of this Index, @see VectorIndexParams.
*/
async create_index (indexParams: VectorIndexParams): Promise<any> { async create_index (indexParams: VectorIndexParams): Promise<any> {
return tableCreateVectorIndex.call(this._tbl, indexParams) return tableCreateVectorIndex.call(this._tbl, indexParams)
} }
@@ -177,32 +226,35 @@ export type VectorIndexParams = IvfPQIndexConfig
/** /**
* A builder for nearest neighbor queries for LanceDB. * A builder for nearest neighbor queries for LanceDB.
*/ */
export class Query { export class Query<T = number[]> {
private readonly _tbl: any private readonly _tbl: any
private readonly _queryVector: number[] private readonly _query: T
private _queryVector?: number[]
private _limit: number private _limit: number
private _refineFactor?: number private _refineFactor?: number
private _nprobes: number private _nprobes: number
private readonly _columns?: string[] private readonly _columns?: string[]
private _filter?: string private _filter?: string
private _metricType?: MetricType private _metricType?: MetricType
private readonly _embeddings?: EmbeddingFunction<T>
constructor (tbl: any, queryVector: number[]) { constructor (tbl: any, query: T, embeddings?: EmbeddingFunction<T>) {
this._tbl = tbl this._tbl = tbl
this._queryVector = queryVector this._query = query
this._limit = 10 this._limit = 10
this._nprobes = 20 this._nprobes = 20
this._refineFactor = undefined this._refineFactor = undefined
this._columns = undefined this._columns = undefined
this._filter = undefined this._filter = undefined
this._metricType = undefined this._metricType = undefined
this._embeddings = embeddings
} }
/*** /***
* Sets the number of results that will be returned * Sets the number of results that will be returned
* @param value number of results * @param value number of results
*/ */
limit (value: number): Query { limit (value: number): Query<T> {
this._limit = value this._limit = value
return this return this
} }
@@ -211,7 +263,7 @@ export class Query {
* Refine the results by reading extra elements and re-ranking them in memory. * Refine the results by reading extra elements and re-ranking them in memory.
* @param value refine factor to use in this query. * @param value refine factor to use in this query.
*/ */
refineFactor (value: number): Query { refineFactor (value: number): Query<T> {
this._refineFactor = value this._refineFactor = value
return this return this
} }
@@ -220,7 +272,7 @@ export class Query {
* The number of probes used. A higher number makes search more accurate but also slower. * The number of probes used. A higher number makes search more accurate but also slower.
* @param value The number of probes used. * @param value The number of probes used.
*/ */
nprobes (value: number): Query { nprobes (value: number): Query<T> {
this._nprobes = value this._nprobes = value
return this return this
} }
@@ -229,7 +281,7 @@ export class Query {
* A filter statement to be applied to this query. * A filter statement to be applied to this query.
* @param value A filter in the same format used by a sql WHERE clause. * @param value A filter in the same format used by a sql WHERE clause.
*/ */
filter (value: string): Query { filter (value: string): Query<T> {
this._filter = value this._filter = value
return this return this
} }
@@ -238,7 +290,7 @@ export class Query {
* The MetricType used for this Query. * The MetricType used for this Query.
* @param value The metric to the. @see MetricType for the different options * @param value The metric to the. @see MetricType for the different options
*/ */
metricType (value: MetricType): Query { metricType (value: MetricType): Query<T> {
this._metricType = value this._metricType = value
return this return this
} }
@@ -247,6 +299,12 @@ export class Query {
* Execute the query and return the results as an Array of Objects * Execute the query and return the results as an Array of Objects
*/ */
async execute<T = Record<string, unknown>> (): Promise<T[]> { async execute<T = Record<string, unknown>> (): Promise<T[]> {
if (this._embeddings !== undefined) {
this._queryVector = (await this._embeddings.embed([this._query]))[0]
} else {
this._queryVector = this._query as number[]
}
const buffer = await tableSearch.call(this._tbl, this) const buffer = await tableSearch.call(this._tbl, this)
const data = tableFromIPC(buffer) const data = tableFromIPC(buffer)
return data.toArray().map((entry: Record<string, unknown>) => { return data.toArray().map((entry: Record<string, unknown>) => {

View File

@@ -0,0 +1,50 @@
// Copyright 2023 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { describe } from 'mocha'
import { assert } from 'chai'
import { OpenAIEmbeddingFunction } from '../../embedding/openai'
// eslint-disable-next-line @typescript-eslint/no-var-requires
const { OpenAIApi } = require('openai')
// eslint-disable-next-line @typescript-eslint/no-var-requires
const { stub } = require('sinon')
describe('OpenAPIEmbeddings', function () {
const stubValue = {
data: {
data: [
{
embedding: Array(1536).fill(1.0)
},
{
embedding: Array(1536).fill(2.0)
}
]
}
}
describe('#embed', function () {
it('should create vector embeddings', async function () {
const openAIStub = stub(OpenAIApi.prototype, 'createEmbedding').returns(stubValue)
const f = new OpenAIEmbeddingFunction('text', 'sk-key')
const vectors = await f.embed(['abc', 'def'])
assert.isTrue(openAIStub.calledOnce)
assert.equal(vectors.length, 2)
assert.deepEqual(vectors[0], stubValue.data.data[0].embedding)
assert.deepEqual(vectors[1], stubValue.data.data[1].embedding)
})
})
})

52
node/src/test/io.ts Normal file
View File

@@ -0,0 +1,52 @@
// Copyright 2023 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// IO tests
import { describe } from 'mocha'
import { assert } from 'chai'
import * as lancedb from '../index'
describe('LanceDB S3 client', function () {
if (process.env.TEST_S3_BASE_URL != null) {
const baseUri = process.env.TEST_S3_BASE_URL
it('should have a valid url', async function () {
const uri = `${baseUri}/valid_url`
const table = await createTestDB(uri, 2, 20)
const con = await lancedb.connect(uri)
assert.equal(con.uri, uri)
const results = await table.search([0.1, 0.3]).limit(5).execute()
assert.equal(results.length, 5)
})
} else {
describe.skip('Skip S3 test', function () {})
}
})
async function createTestDB (uri: string, numDimensions: number = 2, numRows: number = 2): Promise<lancedb.Table> {
const con = await lancedb.connect(uri)
const data = []
for (let i = 0; i < numRows; i++) {
const vector = []
for (let j = 0; j < numDimensions; j++) {
vector.push(i + (j * 0.1))
}
data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector })
}
return await con.createTable('vectors', data)
}

View File

@@ -17,7 +17,7 @@ import { assert } from 'chai'
import { track } from 'temp' import { track } from 'temp'
import * as lancedb from '../index' import * as lancedb from '../index'
import { MetricType, Query } from '../index' import { type EmbeddingFunction, MetricType, Query } from '../index'
describe('LanceDB client', function () { describe('LanceDB client', function () {
describe('when creating a connection to lancedb', function () { describe('when creating a connection to lancedb', function () {
@@ -140,6 +140,39 @@ describe('LanceDB client', function () {
await table.create_index({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2 }) await table.create_index({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2 })
}).timeout(10_000) // Timeout is high partially because GH macos runner is pretty slow }).timeout(10_000) // Timeout is high partially because GH macos runner is pretty slow
}) })
describe('when using a custom embedding function', function () {
class TextEmbedding implements EmbeddingFunction<string> {
sourceColumn: string
constructor (targetColumn: string) {
this.sourceColumn = targetColumn
}
_embedding_map = new Map<string, number[]>([
['foo', [2.1, 2.2]],
['bar', [3.1, 3.2]]
])
async embed (data: string[]): Promise<number[][]> {
return data.map(datum => this._embedding_map.get(datum) ?? [0.0, 0.0])
}
}
it('should encode the original data into embeddings', async function () {
const dir = await track().mkdir('lancejs')
const con = await lancedb.connect(dir)
const embeddings = new TextEmbedding('name')
const data = [
{ price: 10, name: 'foo' },
{ price: 50, name: 'bar' }
]
const table = await con.createTable('vectors', data, embeddings)
const results = await table.search('foo').execute()
assert.equal(results.length, 2)
})
})
}) })
describe('Query object', function () { describe('Query object', function () {

108
notebooks/diffusiondb/datagen.py Executable file
View File

@@ -0,0 +1,108 @@
#!/usr/bin/env python
#
# Copyright 2023 LanceDB Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Dataset hf://poloclub/diffusiondb
"""
import io
from argparse import ArgumentParser
from multiprocessing import Pool
import lance
import lancedb
import pyarrow as pa
from datasets import load_dataset
from PIL import Image
from transformers import CLIPModel, CLIPProcessor, CLIPTokenizerFast
MODEL_ID = "openai/clip-vit-base-patch32"
device = "cuda"
tokenizer = CLIPTokenizerFast.from_pretrained(MODEL_ID)
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
schema = pa.schema(
[
pa.field("prompt", pa.string()),
pa.field("seed", pa.uint32()),
pa.field("step", pa.uint16()),
pa.field("cfg", pa.float32()),
pa.field("sampler", pa.string()),
pa.field("width", pa.uint16()),
pa.field("height", pa.uint16()),
pa.field("timestamp", pa.timestamp("s")),
pa.field("image_nsfw", pa.float32()),
pa.field("prompt_nsfw", pa.float32()),
pa.field("vector", pa.list_(pa.float32(), 512)),
pa.field("image", pa.binary()),
]
)
def pil_to_bytes(img) -> list[bytes]:
buf = io.BytesIO()
img.save(buf, format="PNG")
return buf.getvalue()
def generate_clip_embeddings(batch) -> pa.RecordBatch:
image = processor(text=None, images=batch["image"], return_tensors="pt")[
"pixel_values"
].to(device)
img_emb = model.get_image_features(image)
batch["vector"] = img_emb.cpu().tolist()
with Pool() as p:
batch["image_bytes"] = p.map(pil_to_bytes, batch["image"])
return batch
def datagen(args):
"""Generate DiffusionDB dataset, and use CLIP model to generate image embeddings."""
dataset = load_dataset("poloclub/diffusiondb", args.subset)
data = []
for b in dataset.map(
generate_clip_embeddings, batched=True, batch_size=256, remove_columns=["image"]
)["train"]:
b["image"] = b["image_bytes"]
del b["image_bytes"]
data.append(b)
tbl = pa.Table.from_pylist(data, schema=schema)
return tbl
def main():
parser = ArgumentParser()
parser.add_argument(
"-o", "--output", metavar="DIR", help="Output lance directory", required=True
)
parser.add_argument(
"-s",
"--subset",
choices=["2m_all", "2m_first_10k", "2m_first_100k"],
default="2m_first_10k",
help="subset of the hg dataset",
)
args = parser.parse_args()
batches = datagen(args)
lance.write_dataset(batches, args.output)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,9 @@
datasets
Pillow
lancedb
isort
black
transformers
--index-url https://download.pytorch.org/whl/cu118
torch
torchvision

View File

@@ -0,0 +1,240 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
]
}
],
"source": [
"!pip install --quiet -U lancedb\n",
"!pip install --quiet gradio transformers torch torchvision"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"import io\n",
"import PIL\n",
"import duckdb\n",
"import lancedb"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## First run setup: Download data and pre-process"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<lance.dataset.LanceDataset at 0x3045db590>"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# remove null prompts\n",
"import lance\n",
"import pyarrow.compute as pc\n",
"\n",
"# download s3://eto-public/datasets/diffusiondb/small_10k.lance to this uri\n",
"data = lance.dataset(\"~/datasets/rawdata.lance\").to_table()\n",
"\n",
"# First data processing and full-text-search index\n",
"db = lancedb.connect(\"~/datasets/demo\")\n",
"tbl = db.create_table(\"diffusiondb\", data.filter(~pc.field(\"prompt\").is_null()))\n",
"tbl = tbl.create_fts_index([\"prompt\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create / Open LanceDB Table"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"db = lancedb.connect(\"~/datasets/demo\")\n",
"tbl = db.open_table(\"diffusiondb\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create CLIP embedding function for the text"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
"from transformers import CLIPModel, CLIPProcessor, CLIPTokenizerFast\n",
"\n",
"MODEL_ID = \"openai/clip-vit-base-patch32\"\n",
"\n",
"tokenizer = CLIPTokenizerFast.from_pretrained(MODEL_ID)\n",
"model = CLIPModel.from_pretrained(MODEL_ID)\n",
"processor = CLIPProcessor.from_pretrained(MODEL_ID)\n",
"\n",
"def embed_func(query):\n",
" inputs = tokenizer([query], padding=True, return_tensors=\"pt\")\n",
" text_features = model.get_text_features(**inputs)\n",
" return text_features.detach().numpy()[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Search functions for Gradio"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
"def find_image_vectors(query):\n",
" emb = embed_func(query)\n",
" return _extract(tbl.search(emb).limit(9).to_df())\n",
"\n",
"def find_image_keywords(query):\n",
" return _extract(tbl.search(query).limit(9).to_df())\n",
"\n",
"def find_image_sql(query):\n",
" diffusiondb = tbl.to_lance()\n",
" return _extract(duckdb.query(query).to_df())\n",
"\n",
"def _extract(df):\n",
" image_col = \"image\"\n",
" return [(PIL.Image.open(io.BytesIO(row[image_col])), row[\"prompt\"]) for _, row in df.iterrows()]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup Gradio interface"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Running on local URL: http://127.0.0.1:7867\n",
"\n",
"To create a public link, set `share=True` in `launch()`.\n"
]
},
{
"data": {
"text/html": [
"<div><iframe src=\"http://127.0.0.1:7867/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": []
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import gradio as gr\n",
"\n",
"\n",
"with gr.Blocks() as demo:\n",
"\n",
" with gr.Row():\n",
" with gr.Tab(\"Embeddings\"):\n",
" vector_query = gr.Textbox(value=\"portraits of a person\", show_label=False)\n",
" b1 = gr.Button(\"Submit\")\n",
" with gr.Tab(\"Keywords\"):\n",
" keyword_query = gr.Textbox(value=\"ninja turtle\", show_label=False)\n",
" b2 = gr.Button(\"Submit\")\n",
" with gr.Tab(\"SQL\"):\n",
" sql_query = gr.Textbox(value=\"SELECT * from diffusiondb WHERE image_nsfw >= 2 LIMIT 9\", show_label=False)\n",
" b3 = gr.Button(\"Submit\")\n",
" with gr.Row():\n",
" gallery = gr.Gallery(\n",
" label=\"Found images\", show_label=False, elem_id=\"gallery\"\n",
" ).style(columns=[3], rows=[3], object_fit=\"contain\", height=\"auto\") \n",
" \n",
" b1.click(find_image_vectors, inputs=vector_query, outputs=gallery)\n",
" b2.click(find_image_keywords, inputs=keyword_query, outputs=gallery)\n",
" b3.click(find_image_sql, inputs=sql_query, outputs=gallery)\n",
" \n",
"demo.launch()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 1
}

View File

@@ -13,13 +13,16 @@
from __future__ import annotations from __future__ import annotations
import os
from pathlib import Path from pathlib import Path
import os
import pyarrow as pa import pyarrow as pa
from pyarrow import fs
from .common import DATA, URI from .common import DATA, URI
from .table import LanceTable from .table import LanceTable
from .util import get_uri_scheme from .util import get_uri_scheme, get_uri_location
class LanceDBConnection: class LanceDBConnection:
@@ -47,12 +50,21 @@ class LanceDBConnection:
------- -------
A list of table names. A list of table names.
""" """
if get_uri_scheme(self.uri) == "file": try:
return [p.stem for p in Path(self.uri).glob("*.lance")] filesystem, path = fs.FileSystem.from_uri(self.uri)
except pa.ArrowInvalid:
raise NotImplementedError( raise NotImplementedError(
"List table_names is only supported for local filesystem for now" "Unsupported scheme: " + self.uri
) )
try:
paths = filesystem.get_file_info(fs.FileSelector(get_uri_location(self.uri)))
except FileNotFoundError:
# It is ok if the file does not exist since it will be created
paths = []
tables = [os.path.splitext(file_info.base_name)[0] for file_info in paths if file_info.extension == 'lance']
return tables
def __len__(self) -> int: def __len__(self) -> int:
return len(self.table_names()) return len(self.table_names())
@@ -112,3 +124,15 @@ class LanceDBConnection:
A LanceTable object representing the table. A LanceTable object representing the table.
""" """
return LanceTable(self, name) return LanceTable(self, name)
def drop_table(self, name: str):
"""Drop a table from the database.
Parameters
----------
name: str
The name of the table.
"""
filesystem, path = pa.fs.FileSystem.from_uri(self.uri)
table_path = os.path.join(path, name + ".lance")
filesystem.delete_dir(table_path)

View File

@@ -16,7 +16,13 @@ import os
from typing import List, Tuple from typing import List, Tuple
import pyarrow as pa import pyarrow as pa
try:
import tantivy import tantivy
except ImportError:
raise ImportError(
"Please install tantivy-py `pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985` to use the full text search feature."
)
from .table import LanceTable from .table import LanceTable

View File

@@ -153,7 +153,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
import tantivy import tantivy
except ImportError: except ImportError:
raise ImportError( raise ImportError(
"You need to install the `lancedb[fts]` extra to use this method." "Please install tantivy-py `pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985` to use the full text search feature."
) )
from .fts import search_index from .fts import search_index

View File

@@ -253,8 +253,7 @@ def _sanitize_vector_column(data: pa.Table, vector_column_name: str) -> pa.Table
vector_column_name: str vector_column_name: str
The name of the vector column. The name of the vector column.
""" """
i = data.column_names.index(vector_column_name) if vector_column_name not in data.column_names:
if i < 0:
raise ValueError(f"Missing vector column: {vector_column_name}") raise ValueError(f"Missing vector column: {vector_column_name}")
vec_arr = data[vector_column_name].combine_chunks() vec_arr = data[vector_column_name].combine_chunks()
if pa.types.is_fixed_size_list(vec_arr.type): if pa.types.is_fixed_size_list(vec_arr.type):
@@ -266,4 +265,4 @@ def _sanitize_vector_column(data: pa.Table, vector_column_name: str) -> pa.Table
values = values.cast(pa.float32()) values = values.cast(pa.float32())
list_size = len(values) / len(data) list_size = len(values) / len(data)
vec_arr = pa.FixedSizeListArray.from_arrays(values, list_size) vec_arr = pa.FixedSizeListArray.from_arrays(values, list_size)
return data.set_column(i, vector_column_name, vec_arr) return data.set_column(data.column_names.index(vector_column_name), vector_column_name, vec_arr)

View File

@@ -41,3 +41,23 @@ def get_uri_scheme(uri: str) -> str:
# So we add special handling here for schemes that are a single character # So we add special handling here for schemes that are a single character
scheme = "file" scheme = "file"
return scheme return scheme
def get_uri_location(uri: str) -> str:
"""
Get the location of a URI. If the parameter is not a url, assumes it is just a path
Parameters
----------
uri : str
The URI to parse.
Returns
-------
str: Location part of the URL, without scheme
"""
parsed = urlparse(uri)
if not parsed.netloc:
return parsed.path
else:
return parsed.netloc + parsed.path

View File

@@ -1,7 +1,7 @@
[project] [project]
name = "lancedb" name = "lancedb"
version = "0.1.2" version = "0.1.5"
dependencies = ["pylance>=0.4.6", "ratelimiter", "retry", "tqdm"] dependencies = ["pylance>=0.4.17", "ratelimiter", "retry", "tqdm"]
description = "lancedb" description = "lancedb"
authors = [ authors = [
{ name = "LanceDB Devs", email = "dev@lancedb.com" }, { name = "LanceDB Devs", email = "dev@lancedb.com" },
@@ -45,10 +45,6 @@ dev = [
docs = [ docs = [
"mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]" "mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"
] ]
fts = [
# tantivy 0.19.2
"tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985"
]
[build-system] [build-system]
requires = [ requires = [

View File

@@ -97,3 +97,26 @@ def test_create_mode(tmp_path):
) )
tbl = db.create_table("test", data=new_data, mode="overwrite") tbl = db.create_table("test", data=new_data, mode="overwrite")
assert tbl.to_pandas().item.tolist() == ["fizz", "buzz"] assert tbl.to_pandas().item.tolist() == ["fizz", "buzz"]
def test_delete_table(tmp_path):
db = lancedb.connect(tmp_path)
data = pd.DataFrame(
{
"vector": [[3.1, 4.1], [5.9, 26.5]],
"item": ["foo", "bar"],
"price": [10.0, 20.0],
}
)
db.create_table("test", data=data)
with pytest.raises(Exception):
db.create_table("test", data=data)
assert db.table_names() == ["test"]
db.drop_table("test")
assert db.table_names() == []
db.create_table("test", data=data)
assert db.table_names() == ["test"]

View File

@@ -14,7 +14,6 @@ import sys
import numpy as np import numpy as np
import pyarrow as pa import pyarrow as pa
from lancedb.embeddings import with_embeddings from lancedb.embeddings import with_embeddings

View File

@@ -13,13 +13,13 @@
import os import os
import random import random
import lancedb.fts
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import pytest import pytest
import tantivy import tantivy
import lancedb as ldb import lancedb as ldb
import lancedb.fts
@pytest.fixture @pytest.fixture

49
python/tests/test_io.py Normal file
View File

@@ -0,0 +1,49 @@
# Copyright 2023 LanceDB Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import pytest
import lancedb
# You need to setup AWS credentials an a base path to run this test. Example
# AWS_PROFILE=default TEST_S3_BASE_URL=s3://my_bucket/dataset pytest tests/test_io.py
@pytest.mark.skipif(
(os.environ.get("TEST_S3_BASE_URL") is None),
reason="please setup s3 base url",
)
def test_s3_io():
db = lancedb.connect(os.environ.get("TEST_S3_BASE_URL"))
assert db.table_names() == []
table = db.create_table(
"test",
data=[
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
],
)
rs = table.search([100, 100]).limit(1).to_df()
assert len(rs) == 1
assert rs["item"].iloc[0] == "bar"
rs = table.search([100, 100]).where("price < 15").limit(2).to_df()
assert len(rs) == 1
assert rs["item"].iloc[0] == "foo"
assert db.table_names() == ["test"]
assert "test" in db
assert len(db) == 1
assert db.open_table("test").name == db["test"].name

View File

@@ -17,7 +17,6 @@ import pandas as pd
import pandas.testing as tm import pandas.testing as tm
import pyarrow as pa import pyarrow as pa
import pytest import pytest
from lancedb.query import LanceQueryBuilder from lancedb.query import LanceQueryBuilder

View File

@@ -16,7 +16,6 @@ from pathlib import Path
import pandas as pd import pandas as pd
import pyarrow as pa import pyarrow as pa
import pytest import pytest
from lancedb.table import LanceTable from lancedb.table import LanceTable

View File

@@ -15,7 +15,7 @@ arrow-ipc = "37.0"
arrow-schema = "37.0" arrow-schema = "37.0"
once_cell = "1" once_cell = "1"
futures = "0.3" futures = "0.3"
lance = "0.4.3" lance = "0.4.17"
vectordb = { path = "../../vectordb" } vectordb = { path = "../../vectordb" }
tokio = { version = "1.23", features = ["rt-multi-thread"] } tokio = { version = "1.23", features = ["rt-multi-thread"] }
neon = {version = "0.10.1", default-features = false, features = ["channel-api", "napi-6", "promise-api", "task-api"] } neon = {version = "0.10.1", default-features = false, features = ["channel-api", "napi-6", "promise-api", "task-api"] }

View File

@@ -39,7 +39,7 @@ pub(crate) fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsP
let add_result = table let add_result = table
.lock() .lock()
.unwrap() .unwrap()
.create_idx(&index_params_builder) .create_index(&index_params_builder)
.await; .await;
deferred.settle_with(&channel, move |mut cx| { deferred.settle_with(&channel, move |mut cx| {

View File

@@ -56,23 +56,46 @@ fn runtime<'a, C: Context<'a>>(cx: &mut C) -> NeonResult<&'static Runtime> {
RUNTIME.get_or_try_init(|| Runtime::new().or_else(|err| cx.throw_error(err.to_string()))) RUNTIME.get_or_try_init(|| Runtime::new().or_else(|err| cx.throw_error(err.to_string())))
} }
fn database_new(mut cx: FunctionContext) -> JsResult<JsBox<JsDatabase>> { fn database_new(mut cx: FunctionContext) -> JsResult<JsPromise> {
let path = cx.argument::<JsString>(0)?.value(&mut cx); let path = cx.argument::<JsString>(0)?.value(&mut cx);
let rt = runtime(&mut cx)?;
let channel = cx.channel();
let (deferred, promise) = cx.promise();
rt.spawn(async move {
let database = Database::connect(&path).await;
deferred.settle_with(&channel, move |mut cx| {
let db = JsDatabase { let db = JsDatabase {
database: Arc::new(Database::connect(path).or_else(|err| cx.throw_error(err.to_string()))?), database: Arc::new(database.or_else(|err| cx.throw_error(err.to_string()))?),
}; };
Ok(cx.boxed(db)) Ok(cx.boxed(db))
});
});
Ok(promise)
} }
fn database_table_names(mut cx: FunctionContext) -> JsResult<JsArray> { fn database_table_names(mut cx: FunctionContext) -> JsResult<JsPromise> {
let db = cx let db = cx
.this() .this()
.downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?; .downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
let tables = db
.database let rt = runtime(&mut cx)?;
.table_names() let (deferred, promise) = cx.promise();
.or_else(|err| cx.throw_error(err.to_string()))?; let channel = cx.channel();
convert::vec_str_to_array(&tables, &mut cx) let database = db.database.clone();
rt.spawn(async move {
let tables_rst = database.table_names().await;
deferred.settle_with(&channel, move |mut cx| {
let tables = tables_rst.or_else(|err| cx.throw_error(err.to_string()))?;
let table_names = convert::vec_str_to_array(&tables, &mut cx);
table_names
});
});
Ok(promise)
} }
fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> { fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
@@ -87,7 +110,7 @@ fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
let (deferred, promise) = cx.promise(); let (deferred, promise) = cx.promise();
rt.spawn(async move { rt.spawn(async move {
let table_rst = database.open_table(table_name).await; let table_rst = database.open_table(&table_name).await;
deferred.settle_with(&channel, move |mut cx| { deferred.settle_with(&channel, move |mut cx| {
let table = Arc::new(Mutex::new( let table = Arc::new(Mutex::new(
@@ -186,7 +209,7 @@ fn table_create(mut cx: FunctionContext) -> JsResult<JsPromise> {
rt.block_on(async move { rt.block_on(async move {
let batch_reader: Box<dyn RecordBatchReader> = Box::new(RecordBatchBuffer::new(batches)); let batch_reader: Box<dyn RecordBatchReader> = Box::new(RecordBatchBuffer::new(batches));
let table_rst = database.create_table(table_name, batch_reader).await; let table_rst = database.create_table(&table_name, batch_reader).await;
deferred.settle_with(&channel, move |mut cx| { deferred.settle_with(&channel, move |mut cx| {
let table = Arc::new(Mutex::new( let table = Arc::new(Mutex::new(

View File

@@ -12,7 +12,9 @@ repository = "https://github.com/lancedb/lancedb"
arrow-array = "37.0" arrow-array = "37.0"
arrow-data = "37.0" arrow-data = "37.0"
arrow-schema = "37.0" arrow-schema = "37.0"
lance = "0.4.3" object_store = "0.5.6"
snafu = "0.7.4"
lance = "0.4.17"
tokio = { version = "1.23", features = ["rt-multi-thread"] } tokio = { version = "1.23", features = ["rt-multi-thread"] }
[dev-dependencies] [dev-dependencies]

View File

@@ -12,16 +12,20 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use arrow_array::RecordBatchReader;
use std::fs::create_dir_all; use std::fs::create_dir_all;
use std::path::{Path, PathBuf}; use std::path::Path;
use std::sync::Arc;
use crate::error::Result; use arrow_array::RecordBatchReader;
use lance::io::object_store::ObjectStore;
use snafu::prelude::*;
use crate::error::{CreateDirSnafu, Result};
use crate::table::Table; use crate::table::Table;
pub struct Database { pub struct Database {
pub(crate) path: Arc<PathBuf>, object_store: ObjectStore,
pub(crate) uri: String,
} }
const LANCE_EXTENSION: &str = "lance"; const LANCE_EXTENSION: &str = "lance";
@@ -37,26 +41,38 @@ impl Database {
/// # Returns /// # Returns
/// ///
/// * A [Database] object. /// * A [Database] object.
pub fn connect<P: AsRef<Path>>(path: P) -> Result<Database> { pub async fn connect(uri: &str) -> Result<Database> {
if !path.as_ref().try_exists()? { let object_store = ObjectStore::new(uri).await?;
create_dir_all(&path)?; if object_store.is_local() {
Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?;
} }
Ok(Database { Ok(Database {
path: Arc::new(path.as_ref().to_path_buf()), uri: uri.to_string(),
object_store,
}) })
} }
/// Try to create a local directory to store the lancedb dataset
fn try_create_dir(path: &str) -> core::result::Result<(), std::io::Error> {
let path = Path::new(path);
if !path.try_exists()? {
create_dir_all(&path)?;
}
Ok(())
}
/// Get the names of all tables in the database. /// Get the names of all tables in the database.
/// ///
/// # Returns /// # Returns
/// ///
/// * A [Vec<String>] with all table names. /// * A [Vec<String>] with all table names.
pub fn table_names(&self) -> Result<Vec<String>> { pub async fn table_names(&self) -> Result<Vec<String>> {
let f = self let f = self
.path .object_store
.read_dir()? .read_dir("/")
.flatten() .await?
.map(|dir_entry| dir_entry.path()) .iter()
.map(|fname| Path::new(fname))
.filter(|path| { .filter(|path| {
let is_lance = path let is_lance = path
.extension() .extension()
@@ -76,10 +92,10 @@ impl Database {
pub async fn create_table( pub async fn create_table(
&self, &self,
name: String, name: &str,
batches: Box<dyn RecordBatchReader>, batches: Box<dyn RecordBatchReader>,
) -> Result<Table> { ) -> Result<Table> {
Table::create(self.path.clone(), name, batches).await Table::create(&self.uri, name, batches).await
} }
/// Open a table in the database. /// Open a table in the database.
@@ -90,8 +106,8 @@ impl Database {
/// # Returns /// # Returns
/// ///
/// * A [Table] object. /// * A [Table] object.
pub async fn open_table(&self, name: String) -> Result<Table> { pub async fn open_table(&self, name: &str) -> Result<Table> {
Table::open(self.path.clone(), name).await Table::open(&self.uri, name).await
} }
} }
@@ -105,10 +121,10 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_connect() { async fn test_connect() {
let tmp_dir = tempdir().unwrap(); let tmp_dir = tempdir().unwrap();
let path_buf = tmp_dir.into_path(); let uri = tmp_dir.path().to_str().unwrap();
let db = Database::connect(&path_buf); let db = Database::connect(uri).await.unwrap();
assert_eq!(db.unwrap().path.as_path(), path_buf.as_path()) assert_eq!(db.uri, uri);
} }
#[tokio::test] #[tokio::test]
@@ -118,10 +134,16 @@ mod tests {
create_dir_all(tmp_dir.path().join("table2.lance")).unwrap(); create_dir_all(tmp_dir.path().join("table2.lance")).unwrap();
create_dir_all(tmp_dir.path().join("invalidlance")).unwrap(); create_dir_all(tmp_dir.path().join("invalidlance")).unwrap();
let db = Database::connect(&tmp_dir.into_path()).unwrap(); let uri = tmp_dir.path().to_str().unwrap();
let tables = db.table_names().unwrap(); let db = Database::connect(uri).await.unwrap();
let tables = db.table_names().await.unwrap();
assert_eq!(tables.len(), 2); assert_eq!(tables.len(), 2);
assert!(tables.contains(&String::from("table1"))); assert!(tables.contains(&String::from("table1")));
assert!(tables.contains(&String::from("table2"))); assert!(tables.contains(&String::from("table2")));
} }
#[tokio::test]
async fn test_connect_s3() {
// let db = Database::connect("s3://bucket/path/to/database").await.unwrap();
}
} }

View File

@@ -12,32 +12,50 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#[derive(Debug)] use snafu::Snafu;
pub enum Error {
IO(String),
Lance(String),
}
impl std::fmt::Display for Error { #[derive(Debug, Snafu)]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { #[snafu(visibility(pub(crate)))]
let (catalog, message) = match self { pub enum Error {
Self::IO(s) => ("I/O", s.as_str()), #[snafu(display("LanceDBError: Invalid table name: {name}"))]
Self::Lance(s) => ("Lance", s.as_str()), InvalidTableName { name: String },
}; #[snafu(display("LanceDBError: Table '{name}' was not found"))]
write!(f, "LanceDBError({catalog}): {message}") TableNotFound { name: String },
} #[snafu(display("LanceDBError: Table '{name}' already exists"))]
TableAlreadyExists { name: String },
#[snafu(display("LanceDBError: Unable to created lance dataset at {path}: {source}"))]
CreateDir {
path: String,
source: std::io::Error,
},
#[snafu(display("LanceDBError: {message}"))]
Store { message: String },
#[snafu(display("LanceDBError: {message}"))]
Lance { message: String },
} }
pub type Result<T> = std::result::Result<T, Error>; pub type Result<T> = std::result::Result<T, Error>;
impl From<std::io::Error> for Error { impl From<lance::Error> for Error {
fn from(e: std::io::Error) -> Self { fn from(e: lance::Error) -> Self {
Self::IO(e.to_string()) Self::Lance {
message: e.to_string(),
}
} }
} }
impl From<lance::Error> for Error { impl From<object_store::Error> for Error {
fn from(e: lance::Error) -> Self { fn from(e: object_store::Error) -> Self {
Self::Lance(e.to_string()) Self::Store {
message: e.to_string(),
}
}
}
impl From<object_store::path::Error> for Error {
fn from(e: object_store::path::Error) -> Self {
Self::Store {
message: e.to_string(),
}
} }
} }

View File

@@ -12,28 +12,35 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::path::PathBuf; use std::path::Path;
use std::sync::Arc; use std::sync::Arc;
use arrow_array::{Float32Array, RecordBatchReader}; use arrow_array::{Float32Array, RecordBatchReader};
use lance::dataset::{Dataset, WriteMode, WriteParams}; use lance::dataset::{Dataset, WriteMode, WriteParams};
use lance::index::IndexType; use lance::index::IndexType;
use snafu::prelude::*;
use crate::error::{Error, Result}; use crate::error::{Error, InvalidTableNameSnafu, Result};
use crate::index::vector::VectorIndexBuilder; use crate::index::vector::VectorIndexBuilder;
use crate::query::Query; use crate::query::Query;
pub const VECTOR_COLUMN_NAME: &str = "vector"; pub const VECTOR_COLUMN_NAME: &str = "vector";
pub const LANCE_FILE_EXTENSION: &str = "lance"; pub const LANCE_FILE_EXTENSION: &str = "lance";
/// A table in a LanceDB database. /// A table in a LanceDB database.
#[derive(Debug)]
pub struct Table { pub struct Table {
name: String, name: String,
path: String, uri: String,
dataset: Arc<Dataset>, dataset: Arc<Dataset>,
} }
impl std::fmt::Display for Table {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Table({})", self.name)
}
}
impl Table { impl Table {
/// Opens an existing Table /// Opens an existing Table
/// ///
@@ -45,18 +52,28 @@ impl Table {
/// # Returns /// # Returns
/// ///
/// * A [Table] object. /// * A [Table] object.
pub async fn open(base_path: Arc<PathBuf>, name: String) -> Result<Self> { pub async fn open(base_uri: &str, name: &str) -> Result<Self> {
let ds_path = base_path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION)); let path = Path::new(base_uri);
let ds_uri = ds_path
let table_uri = path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
let uri = table_uri
.as_path()
.to_str() .to_str()
.ok_or(Error::IO(format!("Unable to find table {}", name)))?; .context(InvalidTableNameSnafu { name })?;
let dataset = Dataset::open(ds_uri).await?;
let table = Table { let dataset = Dataset::open(&uri).await.map_err(|e| match e {
name, lance::Error::DatasetNotFound { .. } => Error::TableNotFound {
path: ds_uri.to_string(), name: name.to_string(),
},
e => Error::Lance {
message: e.to_string(),
},
})?;
Ok(Table {
name: name.to_string(),
uri: uri.to_string(),
dataset: Arc::new(dataset), dataset: Arc::new(dataset),
}; })
Ok(table)
} }
/// Creates a new Table /// Creates a new Table
@@ -71,25 +88,36 @@ impl Table {
/// ///
/// * A [Table] object. /// * A [Table] object.
pub async fn create( pub async fn create(
base_path: Arc<PathBuf>, base_uri: &str,
name: String, name: &str,
mut batches: Box<dyn RecordBatchReader>, mut batches: Box<dyn RecordBatchReader>,
) -> Result<Self> { ) -> Result<Self> {
let ds_path = base_path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION)); let base_path = Path::new(base_uri);
let path = ds_path let table_uri = base_path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
let uri = table_uri
.as_path()
.to_str() .to_str()
.ok_or(Error::IO(format!("Unable to find table {}", name)))?; .context(InvalidTableNameSnafu { name })?
.to_string();
let dataset = let dataset = Dataset::write(&mut batches, &uri, Some(WriteParams::default()))
Arc::new(Dataset::write(&mut batches, path, Some(WriteParams::default())).await?); .await
.map_err(|e| match e {
lance::Error::DatasetAlreadyExists { .. } => Error::TableAlreadyExists {
name: name.to_string(),
},
e => Error::Lance {
message: e.to_string(),
},
})?;
Ok(Table { Ok(Table {
name, name: name.to_string(),
path: path.to_string(), uri,
dataset, dataset: Arc::new(dataset),
}) })
} }
pub async fn create_idx(&mut self, index_builder: &impl VectorIndexBuilder) -> Result<()> { /// Create index on the table.
pub async fn create_index(&mut self, index_builder: &impl VectorIndexBuilder) -> Result<()> {
use lance::index::DatasetIndexExt; use lance::index::DatasetIndexExt;
let dataset = self let dataset = self
@@ -125,8 +153,7 @@ impl Table {
let mut params = WriteParams::default(); let mut params = WriteParams::default();
params.mode = write_mode.unwrap_or(WriteMode::Append); params.mode = write_mode.unwrap_or(WriteMode::Append);
self.dataset = self.dataset = Arc::new(Dataset::write(&mut batches, &self.uri, Some(params)).await?);
Arc::new(Dataset::write(&mut batches, self.path.as_str(), Some(params)).await?);
Ok(batches.count()) Ok(batches.count())
} }
@@ -151,6 +178,8 @@ impl Table {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::sync::Arc;
use arrow_array::{ use arrow_array::{
Array, FixedSizeListArray, Float32Array, Int32Array, RecordBatch, RecordBatchReader, Array, FixedSizeListArray, Float32Array, Int32Array, RecordBatch, RecordBatchReader,
}; };
@@ -161,53 +190,68 @@ mod tests {
use lance::index::vector::ivf::IvfBuildParams; use lance::index::vector::ivf::IvfBuildParams;
use lance::index::vector::pq::PQBuildParams; use lance::index::vector::pq::PQBuildParams;
use rand::Rng; use rand::Rng;
use std::sync::Arc;
use tempfile::tempdir; use tempfile::tempdir;
use crate::error::Result; use super::*;
use crate::index::vector::IvfPQIndexBuilder; use crate::index::vector::IvfPQIndexBuilder;
use crate::table::Table;
#[tokio::test]
async fn test_new_table_not_exists() {
let tmp_dir = tempdir().unwrap();
let path_buf = tmp_dir.into_path();
let table = Table::open(Arc::new(path_buf), "test".to_string()).await;
assert!(table.is_err());
}
#[tokio::test] #[tokio::test]
async fn test_open() { async fn test_open() {
let tmp_dir = tempdir().unwrap(); let tmp_dir = tempdir().unwrap();
let path_buf = tmp_dir.into_path(); let dataset_path = tmp_dir.path().join("test.lance");
let uri = tmp_dir.path().to_str().unwrap();
let mut batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches()); let mut batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
Dataset::write( Dataset::write(&mut batches, dataset_path.to_str().unwrap(), None)
&mut batches,
path_buf.join("test.lance").to_str().unwrap(),
None,
)
.await .await
.unwrap(); .unwrap();
let table = Table::open(Arc::new(path_buf), "test".to_string()) let table = Table::open(uri, "test").await.unwrap();
.await
.unwrap();
assert_eq!(table.name, "test") assert_eq!(table.name, "test")
} }
#[tokio::test] #[tokio::test]
async fn test_add() { async fn test_open_not_found() {
let tmp_dir = tempdir().unwrap(); let tmp_dir = tempdir().unwrap();
let path_buf = tmp_dir.into_path(); let uri = tmp_dir.path().to_str().unwrap();
let table = Table::open(uri, "test").await;
assert!(matches!(table.unwrap_err(), Error::TableNotFound { .. }));
}
#[test]
fn test_object_store_path() {
use std::path::Path as StdPath;
let p = StdPath::new("s3://bucket/path/to/file");
let c = p.join("subfile");
assert_eq!(c.to_str().unwrap(), "s3://bucket/path/to/file/subfile");
}
#[tokio::test]
async fn test_create_already_exists() {
let tmp_dir = tempdir().unwrap();
let uri = tmp_dir.path().to_str().unwrap();
let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches()); let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
let schema = batches.schema().clone(); let schema = batches.schema().clone();
let mut table = Table::create(Arc::new(path_buf), "test".to_string(), batches) Table::create(&uri, "test", batches).await.unwrap();
.await
.unwrap(); let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
let result = Table::create(&uri, "test", batches).await;
assert!(matches!(
result.unwrap_err(),
Error::TableAlreadyExists { .. }
));
}
#[tokio::test]
async fn test_add() {
let tmp_dir = tempdir().unwrap();
let uri = tmp_dir.path().to_str().unwrap();
let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
let schema = batches.schema().clone();
let mut table = Table::create(&uri, "test", batches).await.unwrap();
assert_eq!(table.count_rows().await.unwrap(), 10); assert_eq!(table.count_rows().await.unwrap(), 10);
let new_batches: Box<dyn RecordBatchReader> = let new_batches: Box<dyn RecordBatchReader> =
@@ -225,13 +269,11 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_add_overwrite() { async fn test_add_overwrite() {
let tmp_dir = tempdir().unwrap(); let tmp_dir = tempdir().unwrap();
let path_buf = tmp_dir.into_path(); let uri = tmp_dir.path().to_str().unwrap();
let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches()); let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
let schema = batches.schema().clone(); let schema = batches.schema().clone();
let mut table = Table::create(Arc::new(path_buf), "test".to_string(), batches) let mut table = Table::create(uri, "test", batches).await.unwrap();
.await
.unwrap();
assert_eq!(table.count_rows().await.unwrap(), 10); assert_eq!(table.count_rows().await.unwrap(), 10);
let new_batches: Box<dyn RecordBatchReader> = let new_batches: Box<dyn RecordBatchReader> =
@@ -252,20 +294,15 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_search() { async fn test_search() {
let tmp_dir = tempdir().unwrap(); let tmp_dir = tempdir().unwrap();
let path_buf = tmp_dir.into_path(); let dataset_path = tmp_dir.path().join("test.lance");
let uri = tmp_dir.path().to_str().unwrap();
let mut batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches()); let mut batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
Dataset::write( Dataset::write(&mut batches, dataset_path.to_str().unwrap(), None)
&mut batches,
path_buf.join("test.lance").to_str().unwrap(),
None,
)
.await .await
.unwrap(); .unwrap();
let table = Table::open(Arc::new(path_buf), "test".to_string()) let table = Table::open(uri, "test").await.unwrap();
.await
.unwrap();
let vector = Float32Array::from_iter_values([0.1, 0.2]); let vector = Float32Array::from_iter_values([0.1, 0.2]);
let query = table.search(vector.clone()); let query = table.search(vector.clone());
@@ -291,7 +328,7 @@ mod tests {
use arrow_array::Float32Array; use arrow_array::Float32Array;
let tmp_dir = tempdir().unwrap(); let tmp_dir = tempdir().unwrap();
let path_buf = tmp_dir.into_path(); let uri = tmp_dir.path().to_str().unwrap();
let dimension = 16; let dimension = 16;
let schema = Arc::new(ArrowSchema::new(vec![Field::new( let schema = Arc::new(ArrowSchema::new(vec![Field::new(
@@ -318,9 +355,7 @@ mod tests {
.unwrap()]); .unwrap()]);
let reader: Box<dyn RecordBatchReader + Send> = Box::new(batches); let reader: Box<dyn RecordBatchReader + Send> = Box::new(batches);
let mut table = Table::create(Arc::new(path_buf), "test".to_string(), reader) let mut table = Table::create(uri, "test", reader).await.unwrap();
.await
.unwrap();
let mut i = IvfPQIndexBuilder::new(); let mut i = IvfPQIndexBuilder::new();
@@ -330,7 +365,7 @@ mod tests {
.ivf_params(IvfBuildParams::new(256)) .ivf_params(IvfBuildParams::new(256))
.pq_params(PQBuildParams::default()); .pq_params(PQBuildParams::default());
table.create_idx(index_builder).await.unwrap(); table.create_index(index_builder).await.unwrap();
assert_eq!(table.dataset.load_indices().await.unwrap().len(), 1); assert_eq!(table.dataset.load_indices().await.unwrap().len(), 1);
assert_eq!(table.count_rows().await.unwrap(), 512); assert_eq!(table.count_rows().await.unwrap(), 512);