mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 13:29:57 +00:00
Compare commits
33 Commits
v0.1.13
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f4ce86e12c | ||
|
|
0664eaec82 | ||
|
|
63acdc2069 | ||
|
|
a636bb1075 | ||
|
|
5e3167da83 | ||
|
|
f09db4a6d6 | ||
|
|
1d343edbd4 | ||
|
|
980f910f50 | ||
|
|
fb97b03a51 | ||
|
|
141b6647a8 | ||
|
|
b45ac4608f | ||
|
|
a86bc05131 | ||
|
|
3537afb2c3 | ||
|
|
23f5dddc7c | ||
|
|
9748406cba | ||
|
|
6271949d38 | ||
|
|
131ad09ab3 | ||
|
|
030f07e7f0 | ||
|
|
72afa06b7a | ||
|
|
088e745e1d | ||
|
|
7a57cddb2c | ||
|
|
8ff5f88916 | ||
|
|
028a6e433d | ||
|
|
04c6814fb1 | ||
|
|
c62e4ca1eb | ||
|
|
aecc5fc42b | ||
|
|
2fdcb307eb | ||
|
|
ad18826579 | ||
|
|
a8a50591d7 | ||
|
|
6dfe7fabc2 | ||
|
|
2b108e1c80 | ||
|
|
8c9edafccc | ||
|
|
0590413b96 |
@@ -1,5 +1,5 @@
|
|||||||
[bumpversion]
|
[bumpversion]
|
||||||
current_version = 0.1.13
|
current_version = 0.1.15
|
||||||
commit = True
|
commit = True
|
||||||
message = Bump version: {current_version} → {new_version}
|
message = Bump version: {current_version} → {new_version}
|
||||||
tag = True
|
tag = True
|
||||||
|
|||||||
2
.github/workflows/docs_test.yml
vendored
2
.github/workflows/docs_test.yml
vendored
@@ -81,7 +81,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
cd docs/test/node_modules/vectordb
|
cd docs/test/node_modules/vectordb
|
||||||
npm ci
|
npm ci
|
||||||
npm run build
|
npm run build-release
|
||||||
npm run tsc
|
npm run tsc
|
||||||
- name: Create test files
|
- name: Create test files
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
7
.github/workflows/make-release-commit.yml
vendored
7
.github/workflows/make-release-commit.yml
vendored
@@ -45,6 +45,13 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
pip install bump2version
|
pip install bump2version
|
||||||
bumpversion --verbose ${{ inputs.part }}
|
bumpversion --verbose ${{ inputs.part }}
|
||||||
|
- name: Update package-lock.json file
|
||||||
|
run: |
|
||||||
|
npm install
|
||||||
|
git add package-lock.json
|
||||||
|
# Add this change to the commit created by bumpversion
|
||||||
|
git commit --amend --no-edit
|
||||||
|
working-directory: node
|
||||||
- name: Push new version and tag
|
- name: Push new version and tag
|
||||||
if: ${{ inputs.dry_run }} == "false"
|
if: ${{ inputs.dry_run }} == "false"
|
||||||
uses: ad-m/github-push-action@master
|
uses: ad-m/github-push-action@master
|
||||||
|
|||||||
37
.github/workflows/npm-publish.yml
vendored
37
.github/workflows/npm-publish.yml
vendored
@@ -116,6 +116,39 @@ jobs:
|
|||||||
path: |
|
path: |
|
||||||
node/dist/vectordb-linux*.tgz
|
node/dist/vectordb-linux*.tgz
|
||||||
|
|
||||||
|
node-windows:
|
||||||
|
runs-on: windows-2022
|
||||||
|
# Only runs on tags that matches the make-release action
|
||||||
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
target: [x86_64-pc-windows-msvc]
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
- name: Install Protoc v21.12
|
||||||
|
working-directory: C:\
|
||||||
|
run: |
|
||||||
|
New-Item -Path 'C:\protoc' -ItemType Directory
|
||||||
|
Set-Location C:\protoc
|
||||||
|
Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
|
||||||
|
7z x protoc.zip
|
||||||
|
Add-Content $env:GITHUB_PATH "C:\protoc\bin"
|
||||||
|
shell: powershell
|
||||||
|
- name: Install npm dependencies
|
||||||
|
run: |
|
||||||
|
cd node
|
||||||
|
npm ci
|
||||||
|
- name: Build Windows native node modules
|
||||||
|
run: .\ci\build_windows_artifacts.ps1 ${{ matrix.target }}
|
||||||
|
- name: Upload Windows Artifacts
|
||||||
|
uses: actions/upload-artifact@v3
|
||||||
|
with:
|
||||||
|
name: windows-native
|
||||||
|
path: |
|
||||||
|
node/dist/vectordb-win32*.tgz
|
||||||
|
|
||||||
release:
|
release:
|
||||||
needs: [node, node-macos, node-linux]
|
needs: [node, node-macos, node-linux]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -128,10 +161,12 @@ jobs:
|
|||||||
- uses: actions/setup-node@v3
|
- uses: actions/setup-node@v3
|
||||||
with:
|
with:
|
||||||
node-version: 20
|
node-version: 20
|
||||||
|
registry-url: 'https://registry.npmjs.org'
|
||||||
- name: Publish to NPM
|
- name: Publish to NPM
|
||||||
env:
|
env:
|
||||||
NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
|
NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
for filename in */*.tgz; do
|
mv */*.tgz .
|
||||||
|
for filename in *.tgz; do
|
||||||
npm publish $filename
|
npm publish $filename
|
||||||
done
|
done
|
||||||
|
|||||||
21
.github/workflows/rust.yml
vendored
21
.github/workflows/rust.yml
vendored
@@ -66,3 +66,24 @@ jobs:
|
|||||||
run: cargo build --all-features
|
run: cargo build --all-features
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: cargo test --all-features
|
run: cargo test --all-features
|
||||||
|
windows:
|
||||||
|
runs-on: windows-2022
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- uses: Swatinem/rust-cache@v2
|
||||||
|
with:
|
||||||
|
workspaces: rust
|
||||||
|
- name: Install Protoc v21.12
|
||||||
|
working-directory: C:\
|
||||||
|
run: |
|
||||||
|
New-Item -Path 'C:\protoc' -ItemType Directory
|
||||||
|
Set-Location C:\protoc
|
||||||
|
Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
|
||||||
|
7z x protoc.zip
|
||||||
|
Add-Content $env:GITHUB_PATH "C:\protoc\bin"
|
||||||
|
shell: powershell
|
||||||
|
- name: Run tests
|
||||||
|
run: |
|
||||||
|
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
|
||||||
|
cargo build
|
||||||
|
cargo test
|
||||||
|
|||||||
@@ -6,9 +6,11 @@ members = [
|
|||||||
resolver = "2"
|
resolver = "2"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = "=0.5.5"
|
lance = "=0.5.8"
|
||||||
arrow-array = "42.0"
|
arrow-array = "42.0"
|
||||||
arrow-data = "42.0"
|
arrow-data = "42.0"
|
||||||
arrow-schema = "42.0"
|
arrow-schema = "42.0"
|
||||||
arrow-ipc = "42.0"
|
arrow-ipc = "42.0"
|
||||||
|
half = { "version" = "=2.2.1", default-features = false }
|
||||||
object_store = "0.6.1"
|
object_store = "0.6.1"
|
||||||
|
|
||||||
|
|||||||
41
ci/build_windows_artifacts.ps1
Normal file
41
ci/build_windows_artifacts.ps1
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# Builds the Windows artifacts (node binaries).
|
||||||
|
# Usage: .\ci\build_windows_artifacts.ps1 [target]
|
||||||
|
# Targets supported:
|
||||||
|
# - x86_64-pc-windows-msvc
|
||||||
|
# - i686-pc-windows-msvc
|
||||||
|
|
||||||
|
function Prebuild-Rust {
|
||||||
|
param (
|
||||||
|
[string]$target
|
||||||
|
)
|
||||||
|
|
||||||
|
# Building here for the sake of easier debugging.
|
||||||
|
Push-Location -Path "rust/ffi/node"
|
||||||
|
Write-Host "Building rust library for $target"
|
||||||
|
$env:RUST_BACKTRACE=1
|
||||||
|
cargo build --release --target $target
|
||||||
|
Pop-Location
|
||||||
|
}
|
||||||
|
|
||||||
|
function Build-NodeBinaries {
|
||||||
|
param (
|
||||||
|
[string]$target
|
||||||
|
)
|
||||||
|
|
||||||
|
Push-Location -Path "node"
|
||||||
|
Write-Host "Building node library for $target"
|
||||||
|
npm run build-release -- --target $target
|
||||||
|
npm run pack-build -- --target $target
|
||||||
|
Pop-Location
|
||||||
|
}
|
||||||
|
|
||||||
|
$targets = $args[0]
|
||||||
|
if (-not $targets) {
|
||||||
|
$targets = "x86_64-pc-windows-msvc"
|
||||||
|
}
|
||||||
|
|
||||||
|
Write-Host "Building artifacts for targets: $targets"
|
||||||
|
foreach ($target in $targets) {
|
||||||
|
Prebuild-Rust $target
|
||||||
|
Build-NodeBinaries $target
|
||||||
|
}
|
||||||
@@ -60,6 +60,9 @@ nav:
|
|||||||
- Python integrations:
|
- Python integrations:
|
||||||
- Pandas and PyArrow: python/arrow.md
|
- Pandas and PyArrow: python/arrow.md
|
||||||
- DuckDB: python/duckdb.md
|
- DuckDB: python/duckdb.md
|
||||||
|
- LangChain 🦜️🔗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html
|
||||||
|
- LlamaIndex 🦙: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html
|
||||||
|
- Pydantic: python/pydantic.md
|
||||||
- Python examples:
|
- Python examples:
|
||||||
- YouTube Transcript Search: notebooks/youtube_transcript_search.ipynb
|
- YouTube Transcript Search: notebooks/youtube_transcript_search.ipynb
|
||||||
- Documentation QA Bot using LangChain: notebooks/code_qa_bot.ipynb
|
- Documentation QA Bot using LangChain: notebooks/code_qa_bot.ipynb
|
||||||
@@ -68,6 +71,7 @@ nav:
|
|||||||
- Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
|
- Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
|
||||||
- Javascript examples:
|
- Javascript examples:
|
||||||
- YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
|
- YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
|
||||||
|
- TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
|
||||||
- References:
|
- References:
|
||||||
- Vector Search: search.md
|
- Vector Search: search.md
|
||||||
- SQL filters: sql.md
|
- SQL filters: sql.md
|
||||||
|
|||||||
@@ -122,6 +122,35 @@ After a table has been created, you can always add more data to it using
|
|||||||
{vector: [9.5, 56.2], item: "buzz", price: 200.0}])
|
{vector: [9.5, 56.2], item: "buzz", price: 200.0}])
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## How to delete rows from a table
|
||||||
|
|
||||||
|
Use the `delete()` method on tables to delete rows from a table. To choose
|
||||||
|
which rows to delete, provide a filter that matches on the metadata columns.
|
||||||
|
This can delete any number of rows that match the filter.
|
||||||
|
|
||||||
|
=== "Python"
|
||||||
|
```python
|
||||||
|
tbl.delete('item = "fizz"')
|
||||||
|
```
|
||||||
|
|
||||||
|
=== "Javascript"
|
||||||
|
```javascript
|
||||||
|
await tbl.delete('item = "fizz"')
|
||||||
|
```
|
||||||
|
|
||||||
|
The deletion predicate is a SQL expression that supports the same expressions
|
||||||
|
as the `where()` clause on a search. They can be as simple or complex as needed.
|
||||||
|
To see what expressions are supported, see the [SQL filters](sql.md) section.
|
||||||
|
|
||||||
|
|
||||||
|
=== "Python"
|
||||||
|
|
||||||
|
Read more: [lancedb.table.Table.delete][]
|
||||||
|
|
||||||
|
=== "Javascript"
|
||||||
|
|
||||||
|
Read more: [vectordb.Table.delete](javascript/interfaces/Table.md#delete)
|
||||||
|
|
||||||
## How to search for (approximate) nearest neighbors
|
## How to search for (approximate) nearest neighbors
|
||||||
|
|
||||||
Once you've embedded the query, you can find its nearest neighbors using the following code:
|
Once you've embedded the query, you can find its nearest neighbors using the following code:
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ You can also use an external API like OpenAI to generate embeddings
|
|||||||
|
|
||||||
def embed_func(c):
|
def embed_func(c):
|
||||||
rs = openai.Embedding.create(input=c, engine="text-embedding-ada-002")
|
rs = openai.Embedding.create(input=c, engine="text-embedding-ada-002")
|
||||||
return [record["embedding"] for record in rs["data"]]
|
return [record["embedding"] for record in rs["data"]]
|
||||||
```
|
```
|
||||||
|
|
||||||
=== "Javascript"
|
=== "Javascript"
|
||||||
@@ -126,7 +126,7 @@ belong in the same latent space and your results will be nonsensical.
|
|||||||
=== "Javascript"
|
=== "Javascript"
|
||||||
```javascript
|
```javascript
|
||||||
const results = await table
|
const results = await table
|
||||||
.search('What's the best pizza topping?')
|
.search("What's the best pizza topping?")
|
||||||
.limit(10)
|
.limit(10)
|
||||||
.execute()
|
.execute()
|
||||||
```
|
```
|
||||||
|
|||||||
121
docs/src/examples/transformerjs_embedding_search_nodejs.md
Normal file
121
docs/src/examples/transformerjs_embedding_search_nodejs.md
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
# Vector embedding search using TransformersJS
|
||||||
|
|
||||||
|
## Embed and query data from LacneDB using TransformersJS
|
||||||
|
|
||||||
|
<img id="splash" width="400" alt="transformersjs" src="https://github.com/lancedb/lancedb/assets/43097991/88a31e30-3d6f-4eef-9216-4b7c688f1b4f">
|
||||||
|
|
||||||
|
This example shows how to use the [transformers.js](https://github.com/xenova/transformers.js) library to perform vector embedding search using LanceDB's Javascript API.
|
||||||
|
|
||||||
|
|
||||||
|
### Setting up
|
||||||
|
First, install the dependencies:
|
||||||
|
```bash
|
||||||
|
npm install vectordb
|
||||||
|
npm i @xenova/transformers
|
||||||
|
```
|
||||||
|
|
||||||
|
We will also be using the [all-MiniLM-L6-v2](https://huggingface.co/Xenova/all-MiniLM-L6-v2) model to make it compatible with Transformers.js
|
||||||
|
|
||||||
|
Within our `index.js` file we will import the necessary libraries and define our model and database:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
const lancedb = require('vectordb')
|
||||||
|
const { pipeline } = await import('@xenova/transformers')
|
||||||
|
const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
||||||
|
```
|
||||||
|
|
||||||
|
### Creating the embedding function
|
||||||
|
|
||||||
|
Next, we will create a function that will take in a string and return the vector embedding of that string. We will use the `pipe` function we defined earlier to get the vector embedding of the string.
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Define the function. `sourceColumn` is required for LanceDB to know
|
||||||
|
// which column to use as input.
|
||||||
|
const embed_fun = {}
|
||||||
|
embed_fun.sourceColumn = 'text'
|
||||||
|
embed_fun.embed = async function (batch) {
|
||||||
|
let result = []
|
||||||
|
// Given a batch of strings, we will use the `pipe` function to get
|
||||||
|
// the vector embedding of each string.
|
||||||
|
for (let text of batch) {
|
||||||
|
// 'mean' pooling and normalizing allows the embeddings to share the
|
||||||
|
// same length.
|
||||||
|
const res = await pipe(text, { pooling: 'mean', normalize: true })
|
||||||
|
result.push(Array.from(res['data']))
|
||||||
|
}
|
||||||
|
return (result)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Creating the database
|
||||||
|
|
||||||
|
Now, we will create the LanceDB database and add the embedding function we defined earlier.
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Link a folder and create a table with data
|
||||||
|
const db = await lancedb.connect('data/sample-lancedb')
|
||||||
|
|
||||||
|
// You can also import any other data, but make sure that you have a column
|
||||||
|
// for the embedding function to use.
|
||||||
|
const data = [
|
||||||
|
{ id: 1, text: 'Cherry', type: 'fruit' },
|
||||||
|
{ id: 2, text: 'Carrot', type: 'vegetable' },
|
||||||
|
{ id: 3, text: 'Potato', type: 'vegetable' },
|
||||||
|
{ id: 4, text: 'Apple', type: 'fruit' },
|
||||||
|
{ id: 5, text: 'Banana', type: 'fruit' }
|
||||||
|
]
|
||||||
|
|
||||||
|
// Create the table with the embedding function
|
||||||
|
const table = await db.createTable('food_table', data, "create", embed_fun)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Performing the search
|
||||||
|
|
||||||
|
Now, we can perform the search using the `search` function. LanceDB automatically uses the embedding function we defined earlier to get the vector embedding of the query string.
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Query the table
|
||||||
|
const results = await table
|
||||||
|
.search("a sweet fruit to eat")
|
||||||
|
.metricType("cosine")
|
||||||
|
.limit(2)
|
||||||
|
.execute()
|
||||||
|
console.log(results.map(r => r.text))
|
||||||
|
```
|
||||||
|
```bash
|
||||||
|
[ 'Banana', 'Cherry' ]
|
||||||
|
```
|
||||||
|
|
||||||
|
Output of `results`:
|
||||||
|
```bash
|
||||||
|
[
|
||||||
|
{
|
||||||
|
vector: Float32Array(384) [
|
||||||
|
-0.057455405592918396,
|
||||||
|
0.03617725893855095,
|
||||||
|
-0.0367760956287384,
|
||||||
|
... 381 more items
|
||||||
|
],
|
||||||
|
id: 5,
|
||||||
|
text: 'Banana',
|
||||||
|
type: 'fruit',
|
||||||
|
score: 0.4919965863227844
|
||||||
|
},
|
||||||
|
{
|
||||||
|
vector: Float32Array(384) [
|
||||||
|
0.0009714411571621895,
|
||||||
|
0.008223623037338257,
|
||||||
|
0.009571489877998829,
|
||||||
|
... 381 more items
|
||||||
|
],
|
||||||
|
id: 1,
|
||||||
|
text: 'Cherry',
|
||||||
|
type: 'fruit',
|
||||||
|
score: 0.5540297031402588
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Wrapping it up
|
||||||
|
|
||||||
|
In this example, we showed how to use the `transformers.js` library to perform vector embedding search using LanceDB's Javascript API. You can find the full code for this example on [Github](https://github.com/lancedb/lancedb/blob/main/node/examples/js-transformers/index.js)!
|
||||||
@@ -10,6 +10,10 @@ A JavaScript / Node.js library for [LanceDB](https://github.com/lancedb/lancedb)
|
|||||||
npm install vectordb
|
npm install vectordb
|
||||||
```
|
```
|
||||||
|
|
||||||
|
This will download the appropriate native library for your platform. We currently
|
||||||
|
support x86_64 Linux, aarch64 Linux, Intel MacOS, and ARM (M1/M2) MacOS. We do not
|
||||||
|
yet support Windows or musl-based Linux (such as Alpine Linux).
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
### Basic Example
|
### Basic Example
|
||||||
@@ -28,12 +32,34 @@ The [examples](./examples) folder contains complete examples.
|
|||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
Run the tests with
|
To build everything fresh:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm install
|
||||||
|
npm run tsc
|
||||||
|
npm run build
|
||||||
|
```
|
||||||
|
|
||||||
|
Then you should be able to run the tests with:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
npm test
|
npm test
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Rebuilding Rust library
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run build
|
||||||
|
```
|
||||||
|
|
||||||
|
### Rebuilding Typescript
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run tsc
|
||||||
|
```
|
||||||
|
|
||||||
|
### Fix lints
|
||||||
|
|
||||||
To run the linter and have it automatically fix all errors
|
To run the linter and have it automatically fix all errors
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ A connection to a LanceDB database.
|
|||||||
### Properties
|
### Properties
|
||||||
|
|
||||||
- [\_db](LocalConnection.md#_db)
|
- [\_db](LocalConnection.md#_db)
|
||||||
- [\_uri](LocalConnection.md#_uri)
|
- [\_options](LocalConnection.md#_options)
|
||||||
|
|
||||||
### Accessors
|
### Accessors
|
||||||
|
|
||||||
@@ -35,18 +35,18 @@ A connection to a LanceDB database.
|
|||||||
|
|
||||||
### constructor
|
### constructor
|
||||||
|
|
||||||
• **new LocalConnection**(`db`, `uri`)
|
• **new LocalConnection**(`db`, `options`)
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
| Name | Type |
|
| Name | Type |
|
||||||
| :------ | :------ |
|
| :------ | :------ |
|
||||||
| `db` | `any` |
|
| `db` | `any` |
|
||||||
| `uri` | `string` |
|
| `options` | [`ConnectionOptions`](../interfaces/ConnectionOptions.md) |
|
||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:132](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L132)
|
[index.ts:184](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L184)
|
||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
@@ -56,17 +56,17 @@ A connection to a LanceDB database.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:130](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L130)
|
[index.ts:182](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L182)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
### \_uri
|
### \_options
|
||||||
|
|
||||||
• `Private` `Readonly` **\_uri**: `string`
|
• `Private` `Readonly` **\_options**: [`ConnectionOptions`](../interfaces/ConnectionOptions.md)
|
||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:129](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L129)
|
[index.ts:181](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L181)
|
||||||
|
|
||||||
## Accessors
|
## Accessors
|
||||||
|
|
||||||
@@ -84,7 +84,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:137](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L137)
|
[index.ts:189](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L189)
|
||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
@@ -112,7 +112,7 @@ Creates a new Table and initialize it with new data.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:177](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L177)
|
[index.ts:230](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L230)
|
||||||
|
|
||||||
▸ **createTable**(`name`, `data`, `mode`): `Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
▸ **createTable**(`name`, `data`, `mode`): `Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\>
|
||||||
|
|
||||||
@@ -134,7 +134,7 @@ Connection.createTable
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:178](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L178)
|
[index.ts:231](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L231)
|
||||||
|
|
||||||
▸ **createTable**<`T`\>(`name`, `data`, `mode`, `embeddings`): `Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
▸ **createTable**<`T`\>(`name`, `data`, `mode`, `embeddings`): `Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||||
|
|
||||||
@@ -165,7 +165,36 @@ Connection.createTable
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:188](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L188)
|
[index.ts:241](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L241)
|
||||||
|
|
||||||
|
▸ **createTable**<`T`\>(`name`, `data`, `mode`, `embeddings?`): `Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||||
|
|
||||||
|
#### Type parameters
|
||||||
|
|
||||||
|
| Name |
|
||||||
|
| :------ |
|
||||||
|
| `T` |
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type |
|
||||||
|
| :------ | :------ |
|
||||||
|
| `name` | `string` |
|
||||||
|
| `data` | `Record`<`string`, `unknown`\>[] |
|
||||||
|
| `mode` | [`WriteMode`](../enums/WriteMode.md) |
|
||||||
|
| `embeddings?` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
Connection.createTable
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:242](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L242)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -190,7 +219,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:201](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L201)
|
[index.ts:266](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L266)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -216,7 +245,7 @@ Drop an existing table.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:211](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L211)
|
[index.ts:276](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L276)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -242,7 +271,7 @@ Open a table in the database.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:153](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L153)
|
[index.ts:205](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L205)
|
||||||
|
|
||||||
▸ **openTable**<`T`\>(`name`, `embeddings`): `Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
▸ **openTable**<`T`\>(`name`, `embeddings`): `Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||||
|
|
||||||
@@ -271,7 +300,34 @@ Connection.openTable
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:160](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L160)
|
[index.ts:212](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L212)
|
||||||
|
|
||||||
|
▸ **openTable**<`T`\>(`name`, `embeddings?`): `Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||||
|
|
||||||
|
#### Type parameters
|
||||||
|
|
||||||
|
| Name |
|
||||||
|
| :------ |
|
||||||
|
| `T` |
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type |
|
||||||
|
| :------ | :------ |
|
||||||
|
| `name` | `string` |
|
||||||
|
| `embeddings?` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<[`Table`](../interfaces/Table.md)<`T`\>\>
|
||||||
|
|
||||||
|
#### Implementation of
|
||||||
|
|
||||||
|
Connection.openTable
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:213](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L213)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -291,4 +347,4 @@ Get the names of all tables in the database.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:144](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L144)
|
[index.ts:196](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L196)
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
|
|||||||
|
|
||||||
- [\_embeddings](LocalTable.md#_embeddings)
|
- [\_embeddings](LocalTable.md#_embeddings)
|
||||||
- [\_name](LocalTable.md#_name)
|
- [\_name](LocalTable.md#_name)
|
||||||
|
- [\_options](LocalTable.md#_options)
|
||||||
- [\_tbl](LocalTable.md#_tbl)
|
- [\_tbl](LocalTable.md#_tbl)
|
||||||
|
|
||||||
### Accessors
|
### Accessors
|
||||||
@@ -43,7 +44,7 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
|
|||||||
|
|
||||||
### constructor
|
### constructor
|
||||||
|
|
||||||
• **new LocalTable**<`T`\>(`tbl`, `name`)
|
• **new LocalTable**<`T`\>(`tbl`, `name`, `options`)
|
||||||
|
|
||||||
#### Type parameters
|
#### Type parameters
|
||||||
|
|
||||||
@@ -57,12 +58,13 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
|
|||||||
| :------ | :------ |
|
| :------ | :------ |
|
||||||
| `tbl` | `any` |
|
| `tbl` | `any` |
|
||||||
| `name` | `string` |
|
| `name` | `string` |
|
||||||
|
| `options` | [`ConnectionOptions`](../interfaces/ConnectionOptions.md) |
|
||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:221](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L221)
|
[index.ts:287](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L287)
|
||||||
|
|
||||||
• **new LocalTable**<`T`\>(`tbl`, `name`, `embeddings`)
|
• **new LocalTable**<`T`\>(`tbl`, `name`, `options`, `embeddings`)
|
||||||
|
|
||||||
#### Type parameters
|
#### Type parameters
|
||||||
|
|
||||||
@@ -76,11 +78,12 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
|
|||||||
| :------ | :------ | :------ |
|
| :------ | :------ | :------ |
|
||||||
| `tbl` | `any` | |
|
| `tbl` | `any` | |
|
||||||
| `name` | `string` | |
|
| `name` | `string` | |
|
||||||
|
| `options` | [`ConnectionOptions`](../interfaces/ConnectionOptions.md) | |
|
||||||
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use when interacting with this table |
|
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use when interacting with this table |
|
||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:227](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L227)
|
[index.ts:294](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L294)
|
||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
@@ -90,7 +93,7 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:219](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L219)
|
[index.ts:284](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L284)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -100,7 +103,17 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:218](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L218)
|
[index.ts:283](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L283)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### \_options
|
||||||
|
|
||||||
|
• `Private` `Readonly` **\_options**: [`ConnectionOptions`](../interfaces/ConnectionOptions.md)
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:285](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L285)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -110,7 +123,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:217](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L217)
|
[index.ts:282](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L282)
|
||||||
|
|
||||||
## Accessors
|
## Accessors
|
||||||
|
|
||||||
@@ -128,7 +141,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:234](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L234)
|
[index.ts:302](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L302)
|
||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
@@ -156,7 +169,7 @@ The number of rows added to the table
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:252](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L252)
|
[index.ts:320](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L320)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -176,7 +189,7 @@ Returns the number of rows in this table.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:278](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L278)
|
[index.ts:362](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L362)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -194,7 +207,7 @@ VectorIndexParams.
|
|||||||
|
|
||||||
| Name | Type | Description |
|
| Name | Type | Description |
|
||||||
| :------ | :------ | :------ |
|
| :------ | :------ | :------ |
|
||||||
| `indexParams` | `IvfPQIndexConfig` | The parameters of this Index, |
|
| `indexParams` | [`IvfPQIndexConfig`](../interfaces/IvfPQIndexConfig.md) | The parameters of this Index, |
|
||||||
|
|
||||||
#### Returns
|
#### Returns
|
||||||
|
|
||||||
@@ -206,7 +219,7 @@ VectorIndexParams.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:271](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L271)
|
[index.ts:355](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L355)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -232,7 +245,7 @@ Delete rows from this table.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:287](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L287)
|
[index.ts:371](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L371)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -260,7 +273,7 @@ The number of rows added to the table
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:262](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L262)
|
[index.ts:338](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L338)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -286,4 +299,4 @@ Creates a search query to find the nearest neighbors of the given search term
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:242](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L242)
|
[index.ts:310](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L310)
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ An embedding function that automatically creates vector representation for a giv
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[embedding/openai.ts:21](https://github.com/lancedb/lancedb/blob/7247834/node/src/embedding/openai.ts#L21)
|
[embedding/openai.ts:21](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/embedding/openai.ts#L21)
|
||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
@@ -50,7 +50,7 @@ An embedding function that automatically creates vector representation for a giv
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[embedding/openai.ts:19](https://github.com/lancedb/lancedb/blob/7247834/node/src/embedding/openai.ts#L19)
|
[embedding/openai.ts:19](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/embedding/openai.ts#L19)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -60,7 +60,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[embedding/openai.ts:18](https://github.com/lancedb/lancedb/blob/7247834/node/src/embedding/openai.ts#L18)
|
[embedding/openai.ts:18](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/embedding/openai.ts#L18)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -76,7 +76,7 @@ The name of the column that will be used as input for the Embedding Function.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[embedding/openai.ts:50](https://github.com/lancedb/lancedb/blob/7247834/node/src/embedding/openai.ts#L50)
|
[embedding/openai.ts:50](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/embedding/openai.ts#L50)
|
||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
@@ -102,4 +102,4 @@ Creates a vector representation for the given values.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[embedding/openai.ts:38](https://github.com/lancedb/lancedb/blob/7247834/node/src/embedding/openai.ts#L38)
|
[embedding/openai.ts:38](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/embedding/openai.ts#L38)
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ A builder for nearest neighbor queries for LanceDB.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:362](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L362)
|
[index.ts:448](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L448)
|
||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
@@ -72,7 +72,7 @@ A builder for nearest neighbor queries for LanceDB.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:360](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L360)
|
[index.ts:446](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L446)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -82,7 +82,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:358](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L358)
|
[index.ts:444](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L444)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -92,7 +92,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:354](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L354)
|
[index.ts:440](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L440)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -102,7 +102,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:359](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L359)
|
[index.ts:445](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L445)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -112,7 +112,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:356](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L356)
|
[index.ts:442](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L442)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -122,7 +122,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:352](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L352)
|
[index.ts:438](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L438)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -132,7 +132,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:353](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L353)
|
[index.ts:439](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L439)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -142,7 +142,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:355](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L355)
|
[index.ts:441](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L441)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -152,7 +152,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:357](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L357)
|
[index.ts:443](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L443)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -162,7 +162,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:351](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L351)
|
[index.ts:437](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L437)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -188,7 +188,7 @@ A filter statement to be applied to this query.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:410](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L410)
|
[index.ts:496](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L496)
|
||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
@@ -210,7 +210,7 @@ Execute the query and return the results as an Array of Objects
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:433](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L433)
|
[index.ts:519](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L519)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -232,7 +232,7 @@ A filter statement to be applied to this query.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:405](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L405)
|
[index.ts:491](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L491)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -254,7 +254,7 @@ Sets the number of results that will be returned
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:378](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L378)
|
[index.ts:464](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L464)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -280,7 +280,7 @@ MetricType for the different options
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:425](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L425)
|
[index.ts:511](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L511)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -302,7 +302,7 @@ The number of probes used. A higher number makes search more accurate but also s
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:396](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L396)
|
[index.ts:482](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L482)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -324,7 +324,7 @@ Refine the results by reading extra elements and re-ranking them in memory.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:387](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L387)
|
[index.ts:473](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L473)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -346,4 +346,4 @@ Return only the specified columns.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:416](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L416)
|
[index.ts:502](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L502)
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ Cosine distance
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:481](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L481)
|
[index.ts:567](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L567)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -34,7 +34,7 @@ Dot product
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:486](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L486)
|
[index.ts:572](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L572)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -46,4 +46,4 @@ Euclidean distance
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:476](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L476)
|
[index.ts:562](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L562)
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ Append new data to the table.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:466](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L466)
|
[index.ts:552](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L552)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -34,7 +34,7 @@ Create a new [Table](../interfaces/Table.md).
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:462](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L462)
|
[index.ts:548](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L548)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -46,4 +46,4 @@ Overwrite the existing [Table](../interfaces/Table.md) if presented.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:464](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L464)
|
[index.ts:550](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L550)
|
||||||
|
|||||||
41
docs/src/javascript/interfaces/AwsCredentials.md
Normal file
41
docs/src/javascript/interfaces/AwsCredentials.md
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
[vectordb](../README.md) / [Exports](../modules.md) / AwsCredentials
|
||||||
|
|
||||||
|
# Interface: AwsCredentials
|
||||||
|
|
||||||
|
## Table of contents
|
||||||
|
|
||||||
|
### Properties
|
||||||
|
|
||||||
|
- [accessKeyId](AwsCredentials.md#accesskeyid)
|
||||||
|
- [secretKey](AwsCredentials.md#secretkey)
|
||||||
|
- [sessionToken](AwsCredentials.md#sessiontoken)
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### accessKeyId
|
||||||
|
|
||||||
|
• **accessKeyId**: `string`
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:31](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L31)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### secretKey
|
||||||
|
|
||||||
|
• **secretKey**: `string`
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:33](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L33)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### sessionToken
|
||||||
|
|
||||||
|
• `Optional` **sessionToken**: `string`
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:35](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L35)
|
||||||
@@ -32,7 +32,7 @@ Connection could be local against filesystem or remote against a server.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:45](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L45)
|
[index.ts:70](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L70)
|
||||||
|
|
||||||
## Methods
|
## Methods
|
||||||
|
|
||||||
@@ -63,7 +63,7 @@ Creates a new Table and initialize it with new data.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:65](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L65)
|
[index.ts:90](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L90)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -84,7 +84,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:67](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L67)
|
[index.ts:92](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L92)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -106,7 +106,7 @@ Drop an existing table.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:73](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L73)
|
[index.ts:98](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L98)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -135,7 +135,7 @@ Open a table in the database.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:55](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L55)
|
[index.ts:80](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L80)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -149,4 +149,4 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:47](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L47)
|
[index.ts:72](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L72)
|
||||||
|
|||||||
30
docs/src/javascript/interfaces/ConnectionOptions.md
Normal file
30
docs/src/javascript/interfaces/ConnectionOptions.md
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
[vectordb](../README.md) / [Exports](../modules.md) / ConnectionOptions
|
||||||
|
|
||||||
|
# Interface: ConnectionOptions
|
||||||
|
|
||||||
|
## Table of contents
|
||||||
|
|
||||||
|
### Properties
|
||||||
|
|
||||||
|
- [awsCredentials](ConnectionOptions.md#awscredentials)
|
||||||
|
- [uri](ConnectionOptions.md#uri)
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### awsCredentials
|
||||||
|
|
||||||
|
• `Optional` **awsCredentials**: [`AwsCredentials`](AwsCredentials.md)
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:40](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L40)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### uri
|
||||||
|
|
||||||
|
• **uri**: `string`
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:39](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L39)
|
||||||
@@ -45,7 +45,7 @@ Creates a vector representation for the given values.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[embedding/embedding_function.ts:27](https://github.com/lancedb/lancedb/blob/7247834/node/src/embedding/embedding_function.ts#L27)
|
[embedding/embedding_function.ts:27](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/embedding/embedding_function.ts#L27)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -57,4 +57,4 @@ The name of the column that will be used as input for the Embedding Function.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[embedding/embedding_function.ts:22](https://github.com/lancedb/lancedb/blob/7247834/node/src/embedding/embedding_function.ts#L22)
|
[embedding/embedding_function.ts:22](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/embedding/embedding_function.ts#L22)
|
||||||
|
|||||||
149
docs/src/javascript/interfaces/IvfPQIndexConfig.md
Normal file
149
docs/src/javascript/interfaces/IvfPQIndexConfig.md
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
[vectordb](../README.md) / [Exports](../modules.md) / IvfPQIndexConfig
|
||||||
|
|
||||||
|
# Interface: IvfPQIndexConfig
|
||||||
|
|
||||||
|
## Table of contents
|
||||||
|
|
||||||
|
### Properties
|
||||||
|
|
||||||
|
- [column](IvfPQIndexConfig.md#column)
|
||||||
|
- [index\_name](IvfPQIndexConfig.md#index_name)
|
||||||
|
- [max\_iters](IvfPQIndexConfig.md#max_iters)
|
||||||
|
- [max\_opq\_iters](IvfPQIndexConfig.md#max_opq_iters)
|
||||||
|
- [metric\_type](IvfPQIndexConfig.md#metric_type)
|
||||||
|
- [num\_bits](IvfPQIndexConfig.md#num_bits)
|
||||||
|
- [num\_partitions](IvfPQIndexConfig.md#num_partitions)
|
||||||
|
- [num\_sub\_vectors](IvfPQIndexConfig.md#num_sub_vectors)
|
||||||
|
- [replace](IvfPQIndexConfig.md#replace)
|
||||||
|
- [type](IvfPQIndexConfig.md#type)
|
||||||
|
- [use\_opq](IvfPQIndexConfig.md#use_opq)
|
||||||
|
|
||||||
|
## Properties
|
||||||
|
|
||||||
|
### column
|
||||||
|
|
||||||
|
• `Optional` **column**: `string`
|
||||||
|
|
||||||
|
The column to be indexed
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:382](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L382)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### index\_name
|
||||||
|
|
||||||
|
• `Optional` **index\_name**: `string`
|
||||||
|
|
||||||
|
A unique name for the index
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:387](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L387)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### max\_iters
|
||||||
|
|
||||||
|
• `Optional` **max\_iters**: `number`
|
||||||
|
|
||||||
|
The max number of iterations for kmeans training.
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:402](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L402)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### max\_opq\_iters
|
||||||
|
|
||||||
|
• `Optional` **max\_opq\_iters**: `number`
|
||||||
|
|
||||||
|
Max number of iterations to train OPQ, if `use_opq` is true.
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:421](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L421)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### metric\_type
|
||||||
|
|
||||||
|
• `Optional` **metric\_type**: [`MetricType`](../enums/MetricType.md)
|
||||||
|
|
||||||
|
Metric type, L2 or Cosine
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:392](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L392)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### num\_bits
|
||||||
|
|
||||||
|
• `Optional` **num\_bits**: `number`
|
||||||
|
|
||||||
|
The number of bits to present one PQ centroid.
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:416](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L416)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### num\_partitions
|
||||||
|
|
||||||
|
• `Optional` **num\_partitions**: `number`
|
||||||
|
|
||||||
|
The number of partitions this index
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:397](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L397)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### num\_sub\_vectors
|
||||||
|
|
||||||
|
• `Optional` **num\_sub\_vectors**: `number`
|
||||||
|
|
||||||
|
Number of subvectors to build PQ code
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:412](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L412)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### replace
|
||||||
|
|
||||||
|
• `Optional` **replace**: `boolean`
|
||||||
|
|
||||||
|
Replace an existing index with the same name if it exists.
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:426](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L426)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### type
|
||||||
|
|
||||||
|
• **type**: ``"ivf_pq"``
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:428](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L428)
|
||||||
|
|
||||||
|
___
|
||||||
|
|
||||||
|
### use\_opq
|
||||||
|
|
||||||
|
• `Optional` **use\_opq**: `boolean`
|
||||||
|
|
||||||
|
Train as optimized product quantization.
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:407](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L407)
|
||||||
@@ -52,7 +52,7 @@ The number of rows added to the table
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:95](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L95)
|
[index.ts:120](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L120)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -72,13 +72,13 @@ Returns the number of rows in this table.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:115](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L115)
|
[index.ts:140](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L140)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
### createIndex
|
### createIndex
|
||||||
|
|
||||||
• **createIndex**: (`indexParams`: `IvfPQIndexConfig`) => `Promise`<`any`\>
|
• **createIndex**: (`indexParams`: [`IvfPQIndexConfig`](IvfPQIndexConfig.md)) => `Promise`<`any`\>
|
||||||
|
|
||||||
#### Type declaration
|
#### Type declaration
|
||||||
|
|
||||||
@@ -94,7 +94,7 @@ VectorIndexParams.
|
|||||||
|
|
||||||
| Name | Type | Description |
|
| Name | Type | Description |
|
||||||
| :------ | :------ | :------ |
|
| :------ | :------ | :------ |
|
||||||
| `indexParams` | `IvfPQIndexConfig` | The parameters of this Index, |
|
| `indexParams` | [`IvfPQIndexConfig`](IvfPQIndexConfig.md) | The parameters of this Index, |
|
||||||
|
|
||||||
##### Returns
|
##### Returns
|
||||||
|
|
||||||
@@ -102,7 +102,7 @@ VectorIndexParams.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:110](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L110)
|
[index.ts:135](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L135)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -116,11 +116,37 @@ ___
|
|||||||
|
|
||||||
Delete rows from this table.
|
Delete rows from this table.
|
||||||
|
|
||||||
|
This can be used to delete a single row, many rows, all rows, or
|
||||||
|
sometimes no rows (if your predicate matches nothing).
|
||||||
|
|
||||||
|
**`Examples`**
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const con = await lancedb.connect("./.lancedb")
|
||||||
|
const data = [
|
||||||
|
{id: 1, vector: [1, 2]},
|
||||||
|
{id: 2, vector: [3, 4]},
|
||||||
|
{id: 3, vector: [5, 6]},
|
||||||
|
];
|
||||||
|
const tbl = await con.createTable("my_table", data)
|
||||||
|
await tbl.delete("id = 2")
|
||||||
|
await tbl.countRows() // Returns 2
|
||||||
|
```
|
||||||
|
|
||||||
|
If you have a list of values to delete, you can combine them into a
|
||||||
|
stringified list and use the `IN` operator:
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const to_remove = [1, 5];
|
||||||
|
await tbl.delete(`id IN (${to_remove.join(",")})`)
|
||||||
|
await tbl.countRows() // Returns 1
|
||||||
|
```
|
||||||
|
|
||||||
##### Parameters
|
##### Parameters
|
||||||
|
|
||||||
| Name | Type | Description |
|
| Name | Type | Description |
|
||||||
| :------ | :------ | :------ |
|
| :------ | :------ | :------ |
|
||||||
| `filter` | `string` | A filter in the same format used by a sql WHERE clause. |
|
| `filter` | `string` | A filter in the same format used by a sql WHERE clause. The filter must not be empty. |
|
||||||
|
|
||||||
##### Returns
|
##### Returns
|
||||||
|
|
||||||
@@ -128,7 +154,7 @@ Delete rows from this table.
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:122](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L122)
|
[index.ts:174](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L174)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -138,7 +164,7 @@ ___
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:81](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L81)
|
[index.ts:106](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L106)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -166,7 +192,7 @@ The number of rows added to the table
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:103](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L103)
|
[index.ts:128](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L128)
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
@@ -192,4 +218,4 @@ Creates a search query to find the nearest neighbors of the given search term
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:87](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L87)
|
[index.ts:112](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L112)
|
||||||
|
|||||||
@@ -18,8 +18,11 @@
|
|||||||
|
|
||||||
### Interfaces
|
### Interfaces
|
||||||
|
|
||||||
|
- [AwsCredentials](interfaces/AwsCredentials.md)
|
||||||
- [Connection](interfaces/Connection.md)
|
- [Connection](interfaces/Connection.md)
|
||||||
|
- [ConnectionOptions](interfaces/ConnectionOptions.md)
|
||||||
- [EmbeddingFunction](interfaces/EmbeddingFunction.md)
|
- [EmbeddingFunction](interfaces/EmbeddingFunction.md)
|
||||||
|
- [IvfPQIndexConfig](interfaces/IvfPQIndexConfig.md)
|
||||||
- [Table](interfaces/Table.md)
|
- [Table](interfaces/Table.md)
|
||||||
|
|
||||||
### Type Aliases
|
### Type Aliases
|
||||||
@@ -34,11 +37,11 @@
|
|||||||
|
|
||||||
### VectorIndexParams
|
### VectorIndexParams
|
||||||
|
|
||||||
Ƭ **VectorIndexParams**: `IvfPQIndexConfig`
|
Ƭ **VectorIndexParams**: [`IvfPQIndexConfig`](interfaces/IvfPQIndexConfig.md)
|
||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:345](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L345)
|
[index.ts:431](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L431)
|
||||||
|
|
||||||
## Functions
|
## Functions
|
||||||
|
|
||||||
@@ -60,4 +63,20 @@ Connect to a LanceDB instance at the given URI
|
|||||||
|
|
||||||
#### Defined in
|
#### Defined in
|
||||||
|
|
||||||
[index.ts:34](https://github.com/lancedb/lancedb/blob/7247834/node/src/index.ts#L34)
|
[index.ts:47](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L47)
|
||||||
|
|
||||||
|
▸ **connect**(`opts`): `Promise`<[`Connection`](interfaces/Connection.md)\>
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Name | Type |
|
||||||
|
| :------ | :------ |
|
||||||
|
| `opts` | `Partial`<[`ConnectionOptions`](interfaces/ConnectionOptions.md)\> |
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
|
||||||
|
`Promise`<[`Connection`](interfaces/Connection.md)\>
|
||||||
|
|
||||||
|
#### Defined in
|
||||||
|
|
||||||
|
[index.ts:48](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L48)
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ Built on top of [Apache Arrow](https://arrow.apache.org/),
|
|||||||
`LanceDB` is easy to integrate with the Python ecosystem, including [Pandas](https://pandas.pydata.org/)
|
`LanceDB` is easy to integrate with the Python ecosystem, including [Pandas](https://pandas.pydata.org/)
|
||||||
and PyArrow.
|
and PyArrow.
|
||||||
|
|
||||||
|
## Create dataset
|
||||||
|
|
||||||
First, we need to connect to a `LanceDB` database.
|
First, we need to connect to a `LanceDB` database.
|
||||||
|
|
||||||
```py
|
```py
|
||||||
@@ -27,10 +29,42 @@ data = pd.DataFrame({
|
|||||||
table = db.create_table("pd_table", data=data)
|
table = db.create_table("pd_table", data=data)
|
||||||
```
|
```
|
||||||
|
|
||||||
You will find detailed instructions of creating dataset and index in
|
Similar to [`pyarrow.write_dataset()`](https://arrow.apache.org/docs/python/generated/pyarrow.dataset.write_dataset.html),
|
||||||
[Basic Operations](basic.md) and [Indexing](ann_indexes.md)
|
[db.create_table()](../python/#lancedb.db.DBConnection.create_table) accepts a wide-range of forms of data.
|
||||||
|
|
||||||
|
For example, if you have a dataset that is larger than memory size, you can create table with `Iterator[pyarrow.RecordBatch]`,
|
||||||
|
to lazily generate data:
|
||||||
|
|
||||||
|
```py
|
||||||
|
|
||||||
|
from typing import Iterable
|
||||||
|
import pyarrow as pa
|
||||||
|
import lancedb
|
||||||
|
|
||||||
|
def make_batches() -> Iterable[pa.RecordBatch]:
|
||||||
|
for i in range(5):
|
||||||
|
yield pa.RecordBatch.from_arrays(
|
||||||
|
[
|
||||||
|
pa.array([[3.1, 4.1], [5.9, 26.5]]),
|
||||||
|
pa.array(["foo", "bar"]),
|
||||||
|
pa.array([10.0, 20.0]),
|
||||||
|
],
|
||||||
|
["vector", "item", "price"])
|
||||||
|
|
||||||
|
schema=pa.schema([
|
||||||
|
pa.field("vector", pa.list_(pa.float32())),
|
||||||
|
pa.field("item", pa.utf8()),
|
||||||
|
pa.field("price", pa.float32()),
|
||||||
|
])
|
||||||
|
|
||||||
|
table = db.create_table("iterable_table", data=make_batches(), schema=schema)
|
||||||
|
```
|
||||||
|
|
||||||
|
You will find detailed instructions of creating dataset in
|
||||||
|
[Basic Operations](../basic.md) and [API](../python/#lancedb.db.DBConnection.create_table)
|
||||||
sections.
|
sections.
|
||||||
|
|
||||||
|
## Vector Search
|
||||||
|
|
||||||
We can now perform similarity search via `LanceDB` Python API.
|
We can now perform similarity search via `LanceDB` Python API.
|
||||||
|
|
||||||
|
|||||||
35
docs/src/python/pydantic.md
Normal file
35
docs/src/python/pydantic.md
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# Pydantic
|
||||||
|
|
||||||
|
[Pydantic](https://docs.pydantic.dev/latest/) is a data validation library in Python.
|
||||||
|
|
||||||
|
## Schema
|
||||||
|
|
||||||
|
LanceDB supports to create Apache Arrow Schema from a
|
||||||
|
[Pydantic BaseModel](https://docs.pydantic.dev/latest/api/main/#pydantic.main.BaseModel)
|
||||||
|
via [pydantic_to_schema()](python.md##lancedb.pydantic.pydantic_to_schema) method.
|
||||||
|
|
||||||
|
::: lancedb.pydantic.pydantic_to_schema
|
||||||
|
|
||||||
|
## Vector Field
|
||||||
|
|
||||||
|
LanceDB provides a [`vector(dim)`](python.md#lancedb.pydantic.vector) method to define a
|
||||||
|
vector Field in a Pydantic Model.
|
||||||
|
|
||||||
|
::: lancedb.pydantic.vector
|
||||||
|
|
||||||
|
## Type Conversion
|
||||||
|
|
||||||
|
LanceDB automatically convert Pydantic fields to
|
||||||
|
[Apache Arrow DataType](https://arrow.apache.org/docs/python/generated/pyarrow.DataType.html#pyarrow.DataType).
|
||||||
|
|
||||||
|
Current supported type conversions:
|
||||||
|
|
||||||
|
| Pydantic Field Type | PyArrow Data Type |
|
||||||
|
| ------------------- | ----------------- |
|
||||||
|
| `int` | `pyarrow.int64` |
|
||||||
|
| `float` | `pyarrow.float64` |
|
||||||
|
| `bool` | `pyarrow.bool` |
|
||||||
|
| `str` | `pyarrow.utf8()` |
|
||||||
|
| `list` | `pyarrow.List` |
|
||||||
|
| `BaseModel` | `pyarrow.Struct` |
|
||||||
|
| `vector(n)` | `pyarrow.FixedSizeList(float32, n)` |
|
||||||
@@ -46,10 +46,6 @@ pip install lancedb
|
|||||||
|
|
||||||
## Utilities
|
## Utilities
|
||||||
|
|
||||||
::: lancedb.schema.schema_to_dict
|
|
||||||
|
|
||||||
::: lancedb.schema.dict_to_schema
|
|
||||||
|
|
||||||
::: lancedb.vector
|
::: lancedb.vector
|
||||||
|
|
||||||
## Integrations
|
## Integrations
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ const excludedFiles = [
|
|||||||
"../src/embedding.md",
|
"../src/embedding.md",
|
||||||
"../src/examples/serverless_lancedb_with_s3_and_lambda.md",
|
"../src/examples/serverless_lancedb_with_s3_and_lambda.md",
|
||||||
"../src/examples/serverless_qa_bot_with_modal_and_langchain.md",
|
"../src/examples/serverless_qa_bot_with_modal_and_langchain.md",
|
||||||
|
"../src/examples/transformerjs_embedding_search_nodejs.md",
|
||||||
"../src/examples/youtube_transcript_bot_with_nodejs.md",
|
"../src/examples/youtube_transcript_bot_with_nodejs.md",
|
||||||
];
|
];
|
||||||
const nodePrefix = "javascript";
|
const nodePrefix = "javascript";
|
||||||
@@ -48,4 +49,4 @@ for (const file of files.filter((file) => !excludedFiles.includes(file))) {
|
|||||||
fs.mkdirSync(path.dirname(outPath), { recursive: true });
|
fs.mkdirSync(path.dirname(outPath), { recursive: true });
|
||||||
fs.writeFileSync(outPath, asyncPrefix + "\n" + lines.join("\n") + asyncSuffix);
|
fs.writeFileSync(outPath, asyncPrefix + "\n" + lines.join("\n") + asyncSuffix);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
66
node/examples/js-transformers/index.js
Normal file
66
node/examples/js-transformers/index.js
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
// Copyright 2023 Lance Developers.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
'use strict'
|
||||||
|
|
||||||
|
|
||||||
|
async function example() {
|
||||||
|
|
||||||
|
const lancedb = require('vectordb')
|
||||||
|
|
||||||
|
// Import transformers and the all-MiniLM-L6-v2 model (https://huggingface.co/Xenova/all-MiniLM-L6-v2)
|
||||||
|
const { pipeline } = await import('@xenova/transformers')
|
||||||
|
const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
||||||
|
|
||||||
|
|
||||||
|
// Create embedding function from pipeline which returns a list of vectors from batch
|
||||||
|
// sourceColumn is the name of the column in the data to be embedded
|
||||||
|
//
|
||||||
|
// Output of pipe is a Tensor { data: Float32Array(384) }, so filter for the vector
|
||||||
|
const embed_fun = {}
|
||||||
|
embed_fun.sourceColumn = 'text'
|
||||||
|
embed_fun.embed = async function (batch) {
|
||||||
|
let result = []
|
||||||
|
for (let text of batch) {
|
||||||
|
const res = await pipe(text, { pooling: 'mean', normalize: true })
|
||||||
|
result.push(Array.from(res['data']))
|
||||||
|
}
|
||||||
|
return (result)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Link a folder and create a table with data
|
||||||
|
const db = await lancedb.connect('data/sample-lancedb')
|
||||||
|
|
||||||
|
const data = [
|
||||||
|
{ id: 1, text: 'Cherry', type: 'fruit' },
|
||||||
|
{ id: 2, text: 'Carrot', type: 'vegetable' },
|
||||||
|
{ id: 3, text: 'Potato', type: 'vegetable' },
|
||||||
|
{ id: 4, text: 'Apple', type: 'fruit' },
|
||||||
|
{ id: 5, text: 'Banana', type: 'fruit' }
|
||||||
|
]
|
||||||
|
|
||||||
|
const table = await db.createTable('food_table', data, "create", embed_fun)
|
||||||
|
|
||||||
|
|
||||||
|
// Query the table
|
||||||
|
const results = await table
|
||||||
|
.search("a sweet fruit to eat")
|
||||||
|
.metricType("cosine")
|
||||||
|
.limit(2)
|
||||||
|
.execute()
|
||||||
|
console.log(results.map(r => r.text))
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
example().then(_ => { console.log("Done!") })
|
||||||
16
node/examples/js-transformers/package.json
Normal file
16
node/examples/js-transformers/package.json
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
{
|
||||||
|
"name": "vectordb-example-js-transformers",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Example for using transformers.js with lancedb",
|
||||||
|
"main": "index.js",
|
||||||
|
"scripts": {
|
||||||
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
|
},
|
||||||
|
"author": "Lance Devs",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"@xenova/transformers": "^2.4.1",
|
||||||
|
"vectordb": "^0.1.12"
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
95
node/package-lock.json
generated
95
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.1.12",
|
"version": "0.1.15",
|
||||||
"lockfileVersion": 2,
|
"lockfileVersion": 2,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.1.12",
|
"version": "0.1.15",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -14,12 +14,14 @@
|
|||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"os": [
|
"os": [
|
||||||
"darwin",
|
"darwin",
|
||||||
"linux"
|
"linux",
|
||||||
|
"win32"
|
||||||
],
|
],
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@apache-arrow/ts": "^12.0.0",
|
"@apache-arrow/ts": "^12.0.0",
|
||||||
"@neon-rs/load": "^0.0.74",
|
"@neon-rs/load": "^0.0.74",
|
||||||
"apache-arrow": "^12.0.0"
|
"apache-arrow": "^12.0.0",
|
||||||
|
"axios": "^1.4.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@neon-rs/cli": "^0.0.74",
|
"@neon-rs/cli": "^0.0.74",
|
||||||
@@ -49,10 +51,11 @@
|
|||||||
"typescript": "*"
|
"typescript": "*"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"vectordb-darwin-arm64": "0.1.12",
|
"vectordb-darwin-arm64": "0.1.15",
|
||||||
"vectordb-darwin-x64": "0.1.12",
|
"vectordb-darwin-x64": "0.1.15",
|
||||||
"vectordb-linux-arm64-gnu": "0.1.12",
|
"vectordb-linux-arm64-gnu": "0.1.15",
|
||||||
"vectordb-linux-x64-gnu": "0.1.12"
|
"vectordb-linux-x64-gnu": "0.1.15",
|
||||||
|
"vectordb-win32-x64-msvc": "0.1.15"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@apache-arrow/ts": {
|
"node_modules/@apache-arrow/ts": {
|
||||||
@@ -840,8 +843,7 @@
|
|||||||
"node_modules/asynckit": {
|
"node_modules/asynckit": {
|
||||||
"version": "0.4.0",
|
"version": "0.4.0",
|
||||||
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
||||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
|
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
||||||
"dev": true
|
|
||||||
},
|
},
|
||||||
"node_modules/available-typed-arrays": {
|
"node_modules/available-typed-arrays": {
|
||||||
"version": "1.0.5",
|
"version": "1.0.5",
|
||||||
@@ -856,12 +858,13 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/axios": {
|
"node_modules/axios": {
|
||||||
"version": "0.26.1",
|
"version": "1.4.0",
|
||||||
"resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
|
"resolved": "https://registry.npmjs.org/axios/-/axios-1.4.0.tgz",
|
||||||
"integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
|
"integrity": "sha512-S4XCWMEmzvo64T9GfvQDOXgYRDJ/wsSZc7Jvdgx5u1sd0JwsuPLqb3SYmusag+edF6ziyMensPVqLTSc1PiSEA==",
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"follow-redirects": "^1.14.8"
|
"follow-redirects": "^1.15.0",
|
||||||
|
"form-data": "^4.0.0",
|
||||||
|
"proxy-from-env": "^1.1.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/balanced-match": {
|
"node_modules/balanced-match": {
|
||||||
@@ -1092,7 +1095,6 @@
|
|||||||
"version": "1.0.8",
|
"version": "1.0.8",
|
||||||
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
||||||
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"delayed-stream": "~1.0.0"
|
"delayed-stream": "~1.0.0"
|
||||||
},
|
},
|
||||||
@@ -1315,7 +1317,6 @@
|
|||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
||||||
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
||||||
"dev": true,
|
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=0.4.0"
|
"node": ">=0.4.0"
|
||||||
}
|
}
|
||||||
@@ -2082,7 +2083,6 @@
|
|||||||
"version": "1.15.2",
|
"version": "1.15.2",
|
||||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
|
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
|
||||||
"integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==",
|
"integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==",
|
||||||
"dev": true,
|
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
"type": "individual",
|
"type": "individual",
|
||||||
@@ -2111,7 +2111,6 @@
|
|||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
||||||
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"asynckit": "^0.4.0",
|
"asynckit": "^0.4.0",
|
||||||
"combined-stream": "^1.0.8",
|
"combined-stream": "^1.0.8",
|
||||||
@@ -2985,7 +2984,6 @@
|
|||||||
"version": "1.52.0",
|
"version": "1.52.0",
|
||||||
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
||||||
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
|
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
|
||||||
"dev": true,
|
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">= 0.6"
|
"node": ">= 0.6"
|
||||||
}
|
}
|
||||||
@@ -2994,7 +2992,6 @@
|
|||||||
"version": "2.1.35",
|
"version": "2.1.35",
|
||||||
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
|
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
|
||||||
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
||||||
"dev": true,
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"mime-db": "1.52.0"
|
"mime-db": "1.52.0"
|
||||||
},
|
},
|
||||||
@@ -3288,6 +3285,15 @@
|
|||||||
"form-data": "^4.0.0"
|
"form-data": "^4.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/openai/node_modules/axios": {
|
||||||
|
"version": "0.26.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
|
||||||
|
"integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"follow-redirects": "^1.14.8"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/optionator": {
|
"node_modules/optionator": {
|
||||||
"version": "0.9.1",
|
"version": "0.9.1",
|
||||||
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz",
|
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz",
|
||||||
@@ -3439,6 +3445,11 @@
|
|||||||
"node": ">= 0.8.0"
|
"node": ">= 0.8.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/proxy-from-env": {
|
||||||
|
"version": "1.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
||||||
|
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
|
||||||
|
},
|
||||||
"node_modules/punycode": {
|
"node_modules/punycode": {
|
||||||
"version": "2.3.0",
|
"version": "2.3.0",
|
||||||
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
|
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
|
||||||
@@ -5097,8 +5108,7 @@
|
|||||||
"asynckit": {
|
"asynckit": {
|
||||||
"version": "0.4.0",
|
"version": "0.4.0",
|
||||||
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
||||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
|
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
||||||
"dev": true
|
|
||||||
},
|
},
|
||||||
"available-typed-arrays": {
|
"available-typed-arrays": {
|
||||||
"version": "1.0.5",
|
"version": "1.0.5",
|
||||||
@@ -5107,12 +5117,13 @@
|
|||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"axios": {
|
"axios": {
|
||||||
"version": "0.26.1",
|
"version": "1.4.0",
|
||||||
"resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
|
"resolved": "https://registry.npmjs.org/axios/-/axios-1.4.0.tgz",
|
||||||
"integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
|
"integrity": "sha512-S4XCWMEmzvo64T9GfvQDOXgYRDJ/wsSZc7Jvdgx5u1sd0JwsuPLqb3SYmusag+edF6ziyMensPVqLTSc1PiSEA==",
|
||||||
"dev": true,
|
|
||||||
"requires": {
|
"requires": {
|
||||||
"follow-redirects": "^1.14.8"
|
"follow-redirects": "^1.15.0",
|
||||||
|
"form-data": "^4.0.0",
|
||||||
|
"proxy-from-env": "^1.1.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"balanced-match": {
|
"balanced-match": {
|
||||||
@@ -5292,7 +5303,6 @@
|
|||||||
"version": "1.0.8",
|
"version": "1.0.8",
|
||||||
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
||||||
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
||||||
"dev": true,
|
|
||||||
"requires": {
|
"requires": {
|
||||||
"delayed-stream": "~1.0.0"
|
"delayed-stream": "~1.0.0"
|
||||||
}
|
}
|
||||||
@@ -5459,8 +5469,7 @@
|
|||||||
"delayed-stream": {
|
"delayed-stream": {
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
||||||
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ=="
|
||||||
"dev": true
|
|
||||||
},
|
},
|
||||||
"diff": {
|
"diff": {
|
||||||
"version": "4.0.2",
|
"version": "4.0.2",
|
||||||
@@ -6030,8 +6039,7 @@
|
|||||||
"follow-redirects": {
|
"follow-redirects": {
|
||||||
"version": "1.15.2",
|
"version": "1.15.2",
|
||||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
|
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
|
||||||
"integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==",
|
"integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA=="
|
||||||
"dev": true
|
|
||||||
},
|
},
|
||||||
"for-each": {
|
"for-each": {
|
||||||
"version": "0.3.3",
|
"version": "0.3.3",
|
||||||
@@ -6046,7 +6054,6 @@
|
|||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
||||||
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
||||||
"dev": true,
|
|
||||||
"requires": {
|
"requires": {
|
||||||
"asynckit": "^0.4.0",
|
"asynckit": "^0.4.0",
|
||||||
"combined-stream": "^1.0.8",
|
"combined-stream": "^1.0.8",
|
||||||
@@ -6660,14 +6667,12 @@
|
|||||||
"mime-db": {
|
"mime-db": {
|
||||||
"version": "1.52.0",
|
"version": "1.52.0",
|
||||||
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
||||||
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
|
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="
|
||||||
"dev": true
|
|
||||||
},
|
},
|
||||||
"mime-types": {
|
"mime-types": {
|
||||||
"version": "2.1.35",
|
"version": "2.1.35",
|
||||||
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
|
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
|
||||||
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
||||||
"dev": true,
|
|
||||||
"requires": {
|
"requires": {
|
||||||
"mime-db": "1.52.0"
|
"mime-db": "1.52.0"
|
||||||
}
|
}
|
||||||
@@ -6893,6 +6898,17 @@
|
|||||||
"requires": {
|
"requires": {
|
||||||
"axios": "^0.26.0",
|
"axios": "^0.26.0",
|
||||||
"form-data": "^4.0.0"
|
"form-data": "^4.0.0"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"axios": {
|
||||||
|
"version": "0.26.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
|
||||||
|
"integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
|
||||||
|
"dev": true,
|
||||||
|
"requires": {
|
||||||
|
"follow-redirects": "^1.14.8"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionator": {
|
"optionator": {
|
||||||
@@ -7001,6 +7017,11 @@
|
|||||||
"integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==",
|
"integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"proxy-from-env": {
|
||||||
|
"version": "1.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
||||||
|
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
|
||||||
|
},
|
||||||
"punycode": {
|
"punycode": {
|
||||||
"version": "2.3.0",
|
"version": "2.3.0",
|
||||||
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
|
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.1.13",
|
"version": "0.1.15",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
@@ -8,7 +8,7 @@
|
|||||||
"tsc": "tsc -b",
|
"tsc": "tsc -b",
|
||||||
"build": "cargo-cp-artifact --artifact cdylib vectordb-node index.node -- cargo build --message-format=json",
|
"build": "cargo-cp-artifact --artifact cdylib vectordb-node index.node -- cargo build --message-format=json",
|
||||||
"build-release": "npm run build -- --release",
|
"build-release": "npm run build -- --release",
|
||||||
"test": "npm run tsc; mocha -recursive dist/test",
|
"test": "npm run tsc && mocha -recursive dist/test",
|
||||||
"lint": "eslint src --ext .js,.ts",
|
"lint": "eslint src --ext .js,.ts",
|
||||||
"clean": "rm -rf node_modules *.node dist/",
|
"clean": "rm -rf node_modules *.node dist/",
|
||||||
"pack-build": "neon pack-build",
|
"pack-build": "neon pack-build",
|
||||||
@@ -56,11 +56,13 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@apache-arrow/ts": "^12.0.0",
|
"@apache-arrow/ts": "^12.0.0",
|
||||||
"@neon-rs/load": "^0.0.74",
|
"@neon-rs/load": "^0.0.74",
|
||||||
"apache-arrow": "^12.0.0"
|
"apache-arrow": "^12.0.0",
|
||||||
|
"axios": "^1.4.0"
|
||||||
},
|
},
|
||||||
"os": [
|
"os": [
|
||||||
"darwin",
|
"darwin",
|
||||||
"linux"
|
"linux",
|
||||||
|
"win32"
|
||||||
],
|
],
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
@@ -71,13 +73,15 @@
|
|||||||
"x86_64-apple-darwin": "vectordb-darwin-x64",
|
"x86_64-apple-darwin": "vectordb-darwin-x64",
|
||||||
"aarch64-apple-darwin": "vectordb-darwin-arm64",
|
"aarch64-apple-darwin": "vectordb-darwin-arm64",
|
||||||
"x86_64-unknown-linux-gnu": "vectordb-linux-x64-gnu",
|
"x86_64-unknown-linux-gnu": "vectordb-linux-x64-gnu",
|
||||||
"aarch64-unknown-linux-gnu": "vectordb-linux-arm64-gnu"
|
"aarch64-unknown-linux-gnu": "vectordb-linux-arm64-gnu",
|
||||||
|
"x86_64-pc-windows-msvc": "vectordb-win32-x64-msvc"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"vectordb-darwin-arm64": "0.1.13",
|
"vectordb-darwin-arm64": "0.1.15",
|
||||||
"vectordb-darwin-x64": "0.1.13",
|
"vectordb-darwin-x64": "0.1.15",
|
||||||
"vectordb-linux-x64-gnu": "0.1.13",
|
"vectordb-linux-arm64-gnu": "0.1.15",
|
||||||
"vectordb-linux-arm64-gnu": "0.1.13"
|
"vectordb-linux-x64-gnu": "0.1.15",
|
||||||
|
"vectordb-win32-x64-msvc": "0.1.15"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,15 +14,15 @@
|
|||||||
|
|
||||||
import {
|
import {
|
||||||
RecordBatchFileWriter,
|
RecordBatchFileWriter,
|
||||||
type Table as ArrowTable,
|
type Table as ArrowTable
|
||||||
tableFromIPC,
|
|
||||||
Vector
|
|
||||||
} from 'apache-arrow'
|
} from 'apache-arrow'
|
||||||
import { fromRecordsToBuffer } from './arrow'
|
import { fromRecordsToBuffer } from './arrow'
|
||||||
import type { EmbeddingFunction } from './embedding/embedding_function'
|
import type { EmbeddingFunction } from './embedding/embedding_function'
|
||||||
|
import { RemoteConnection } from './remote'
|
||||||
|
import { Query } from './query'
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||||
const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableSearch, tableAdd, tableCreateVectorIndex, tableCountRows, tableDelete } = require('../native.js')
|
const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableAdd, tableCreateVectorIndex, tableCountRows, tableDelete } = require('../native.js')
|
||||||
|
|
||||||
export type { EmbeddingFunction }
|
export type { EmbeddingFunction }
|
||||||
export { OpenAIEmbeddingFunction } from './embedding/openai'
|
export { OpenAIEmbeddingFunction } from './embedding/openai'
|
||||||
@@ -37,7 +37,16 @@ export interface AwsCredentials {
|
|||||||
|
|
||||||
export interface ConnectionOptions {
|
export interface ConnectionOptions {
|
||||||
uri: string
|
uri: string
|
||||||
|
|
||||||
awsCredentials?: AwsCredentials
|
awsCredentials?: AwsCredentials
|
||||||
|
|
||||||
|
// API key for the remote connections
|
||||||
|
apiKey?: string
|
||||||
|
// Region to connect
|
||||||
|
region?: string
|
||||||
|
|
||||||
|
// override the host for the remote connections
|
||||||
|
hostOverride?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -54,9 +63,16 @@ export async function connect (arg: string | Partial<ConnectionOptions>): Promis
|
|||||||
// opts = { uri: arg.uri, awsCredentials = arg.awsCredentials }
|
// opts = { uri: arg.uri, awsCredentials = arg.awsCredentials }
|
||||||
opts = Object.assign({
|
opts = Object.assign({
|
||||||
uri: '',
|
uri: '',
|
||||||
awsCredentials: undefined
|
awsCredentials: undefined,
|
||||||
|
apiKey: undefined,
|
||||||
|
region: 'us-west-2'
|
||||||
}, arg)
|
}, arg)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (opts.uri.startsWith('db://')) {
|
||||||
|
// Remote connection
|
||||||
|
return new RemoteConnection(opts)
|
||||||
|
}
|
||||||
const db = await databaseNew(opts.uri)
|
const db = await databaseNew(opts.uri)
|
||||||
return new LocalConnection(db, opts)
|
return new LocalConnection(db, opts)
|
||||||
}
|
}
|
||||||
@@ -142,7 +158,34 @@ export interface Table<T = number[]> {
|
|||||||
/**
|
/**
|
||||||
* Delete rows from this table.
|
* Delete rows from this table.
|
||||||
*
|
*
|
||||||
* @param filter A filter in the same format used by a sql WHERE clause.
|
* This can be used to delete a single row, many rows, all rows, or
|
||||||
|
* sometimes no rows (if your predicate matches nothing).
|
||||||
|
*
|
||||||
|
* @param filter A filter in the same format used by a sql WHERE clause. The
|
||||||
|
* filter must not be empty.
|
||||||
|
*
|
||||||
|
* @examples
|
||||||
|
*
|
||||||
|
* ```ts
|
||||||
|
* const con = await lancedb.connect("./.lancedb")
|
||||||
|
* const data = [
|
||||||
|
* {id: 1, vector: [1, 2]},
|
||||||
|
* {id: 2, vector: [3, 4]},
|
||||||
|
* {id: 3, vector: [5, 6]},
|
||||||
|
* ];
|
||||||
|
* const tbl = await con.createTable("my_table", data)
|
||||||
|
* await tbl.delete("id = 2")
|
||||||
|
* await tbl.countRows() // Returns 2
|
||||||
|
* ```
|
||||||
|
*
|
||||||
|
* If you have a list of values to delete, you can combine them into a
|
||||||
|
* stringified list and use the `IN` operator:
|
||||||
|
*
|
||||||
|
* ```ts
|
||||||
|
* const to_remove = [1, 5];
|
||||||
|
* await tbl.delete(`id IN (${to_remove.join(",")})`)
|
||||||
|
* await tbl.countRows() // Returns 1
|
||||||
|
* ```
|
||||||
*/
|
*/
|
||||||
delete: (filter: string) => Promise<void>
|
delete: (filter: string) => Promise<void>
|
||||||
}
|
}
|
||||||
@@ -164,8 +207,8 @@ export class LocalConnection implements Connection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the names of all tables in the database.
|
* Get the names of all tables in the database.
|
||||||
*/
|
*/
|
||||||
async tableNames (): Promise<string[]> {
|
async tableNames (): Promise<string[]> {
|
||||||
return databaseTableNames.call(this._db)
|
return databaseTableNames.call(this._db)
|
||||||
}
|
}
|
||||||
@@ -176,6 +219,7 @@ export class LocalConnection implements Connection {
|
|||||||
* @param name The name of the table.
|
* @param name The name of the table.
|
||||||
*/
|
*/
|
||||||
async openTable (name: string): Promise<Table>
|
async openTable (name: string): Promise<Table>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Open a table in the database.
|
* Open a table in the database.
|
||||||
*
|
*
|
||||||
@@ -281,7 +325,7 @@ export class LocalTable<T = number[]> implements Table<T> {
|
|||||||
* @param query The query search term
|
* @param query The query search term
|
||||||
*/
|
*/
|
||||||
search (query: T): Query<T> {
|
search (query: T): Query<T> {
|
||||||
return new Query(this._tbl, query, this._embeddings)
|
return new Query(query, this._tbl, this._embeddings)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -403,116 +447,6 @@ export interface IvfPQIndexConfig {
|
|||||||
|
|
||||||
export type VectorIndexParams = IvfPQIndexConfig
|
export type VectorIndexParams = IvfPQIndexConfig
|
||||||
|
|
||||||
/**
|
|
||||||
* A builder for nearest neighbor queries for LanceDB.
|
|
||||||
*/
|
|
||||||
export class Query<T = number[]> {
|
|
||||||
private readonly _tbl: any
|
|
||||||
private readonly _query: T
|
|
||||||
private _queryVector?: number[]
|
|
||||||
private _limit: number
|
|
||||||
private _refineFactor?: number
|
|
||||||
private _nprobes: number
|
|
||||||
private _select?: string[]
|
|
||||||
private _filter?: string
|
|
||||||
private _metricType?: MetricType
|
|
||||||
private readonly _embeddings?: EmbeddingFunction<T>
|
|
||||||
|
|
||||||
constructor (tbl: any, query: T, embeddings?: EmbeddingFunction<T>) {
|
|
||||||
this._tbl = tbl
|
|
||||||
this._query = query
|
|
||||||
this._limit = 10
|
|
||||||
this._nprobes = 20
|
|
||||||
this._refineFactor = undefined
|
|
||||||
this._select = undefined
|
|
||||||
this._filter = undefined
|
|
||||||
this._metricType = undefined
|
|
||||||
this._embeddings = embeddings
|
|
||||||
}
|
|
||||||
|
|
||||||
/***
|
|
||||||
* Sets the number of results that will be returned
|
|
||||||
* @param value number of results
|
|
||||||
*/
|
|
||||||
limit (value: number): Query<T> {
|
|
||||||
this._limit = value
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Refine the results by reading extra elements and re-ranking them in memory.
|
|
||||||
* @param value refine factor to use in this query.
|
|
||||||
*/
|
|
||||||
refineFactor (value: number): Query<T> {
|
|
||||||
this._refineFactor = value
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The number of probes used. A higher number makes search more accurate but also slower.
|
|
||||||
* @param value The number of probes used.
|
|
||||||
*/
|
|
||||||
nprobes (value: number): Query<T> {
|
|
||||||
this._nprobes = value
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A filter statement to be applied to this query.
|
|
||||||
* @param value A filter in the same format used by a sql WHERE clause.
|
|
||||||
*/
|
|
||||||
filter (value: string): Query<T> {
|
|
||||||
this._filter = value
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
where = this.filter
|
|
||||||
|
|
||||||
/** Return only the specified columns.
|
|
||||||
*
|
|
||||||
* @param value Only select the specified columns. If not specified, all columns will be returned.
|
|
||||||
*/
|
|
||||||
select (value: string[]): Query<T> {
|
|
||||||
this._select = value
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The MetricType used for this Query.
|
|
||||||
* @param value The metric to the. @see MetricType for the different options
|
|
||||||
*/
|
|
||||||
metricType (value: MetricType): Query<T> {
|
|
||||||
this._metricType = value
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Execute the query and return the results as an Array of Objects
|
|
||||||
*/
|
|
||||||
async execute<T = Record<string, unknown>> (): Promise<T[]> {
|
|
||||||
if (this._embeddings !== undefined) {
|
|
||||||
this._queryVector = (await this._embeddings.embed([this._query]))[0]
|
|
||||||
} else {
|
|
||||||
this._queryVector = this._query as number[]
|
|
||||||
}
|
|
||||||
|
|
||||||
const buffer = await tableSearch.call(this._tbl, this)
|
|
||||||
const data = tableFromIPC(buffer)
|
|
||||||
|
|
||||||
return data.toArray().map((entry: Record<string, unknown>) => {
|
|
||||||
const newObject: Record<string, unknown> = {}
|
|
||||||
Object.keys(entry).forEach((key: string) => {
|
|
||||||
if (entry[key] instanceof Vector) {
|
|
||||||
newObject[key] = (entry[key] as Vector).toArray()
|
|
||||||
} else {
|
|
||||||
newObject[key] = entry[key]
|
|
||||||
}
|
|
||||||
})
|
|
||||||
return newObject as unknown as T
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Write mode for writing a table.
|
* Write mode for writing a table.
|
||||||
*/
|
*/
|
||||||
|
|||||||
130
node/src/query.ts
Normal file
130
node/src/query.ts
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
// Copyright 2023 LanceDB Developers.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
import { Vector, tableFromIPC } from 'apache-arrow'
|
||||||
|
import { type EmbeddingFunction } from './embedding/embedding_function'
|
||||||
|
import { type MetricType } from '.'
|
||||||
|
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||||
|
const { tableSearch } = require('../native.js')
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A builder for nearest neighbor queries for LanceDB.
|
||||||
|
*/
|
||||||
|
export class Query<T = number[]> {
|
||||||
|
private readonly _query: T
|
||||||
|
private readonly _tbl?: any
|
||||||
|
private _queryVector?: number[]
|
||||||
|
private _limit: number
|
||||||
|
private _refineFactor?: number
|
||||||
|
private _nprobes: number
|
||||||
|
private _select?: string[]
|
||||||
|
private _filter?: string
|
||||||
|
private _metricType?: MetricType
|
||||||
|
protected readonly _embeddings?: EmbeddingFunction<T>
|
||||||
|
|
||||||
|
constructor (query: T, tbl?: any, embeddings?: EmbeddingFunction<T>) {
|
||||||
|
this._tbl = tbl
|
||||||
|
this._query = query
|
||||||
|
this._limit = 10
|
||||||
|
this._nprobes = 20
|
||||||
|
this._refineFactor = undefined
|
||||||
|
this._select = undefined
|
||||||
|
this._filter = undefined
|
||||||
|
this._metricType = undefined
|
||||||
|
this._embeddings = embeddings
|
||||||
|
}
|
||||||
|
|
||||||
|
/***
|
||||||
|
* Sets the number of results that will be returned
|
||||||
|
* @param value number of results
|
||||||
|
*/
|
||||||
|
limit (value: number): Query<T> {
|
||||||
|
this._limit = value
|
||||||
|
return this
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Refine the results by reading extra elements and re-ranking them in memory.
|
||||||
|
* @param value refine factor to use in this query.
|
||||||
|
*/
|
||||||
|
refineFactor (value: number): Query<T> {
|
||||||
|
this._refineFactor = value
|
||||||
|
return this
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of probes used. A higher number makes search more accurate but also slower.
|
||||||
|
* @param value The number of probes used.
|
||||||
|
*/
|
||||||
|
nprobes (value: number): Query<T> {
|
||||||
|
this._nprobes = value
|
||||||
|
return this
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A filter statement to be applied to this query.
|
||||||
|
* @param value A filter in the same format used by a sql WHERE clause.
|
||||||
|
*/
|
||||||
|
filter (value: string): Query<T> {
|
||||||
|
this._filter = value
|
||||||
|
return this
|
||||||
|
}
|
||||||
|
|
||||||
|
where = this.filter
|
||||||
|
|
||||||
|
/** Return only the specified columns.
|
||||||
|
*
|
||||||
|
* @param value Only select the specified columns. If not specified, all columns will be returned.
|
||||||
|
*/
|
||||||
|
select (value: string[]): Query<T> {
|
||||||
|
this._select = value
|
||||||
|
return this
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The MetricType used for this Query.
|
||||||
|
* @param value The metric to the. @see MetricType for the different options
|
||||||
|
*/
|
||||||
|
metricType (value: MetricType): Query<T> {
|
||||||
|
this._metricType = value
|
||||||
|
return this
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute the query and return the results as an Array of Objects
|
||||||
|
*/
|
||||||
|
async execute<T = Record<string, unknown>> (): Promise<T[]> {
|
||||||
|
if (this._embeddings !== undefined) {
|
||||||
|
this._queryVector = (await this._embeddings.embed([this._query]))[0]
|
||||||
|
} else {
|
||||||
|
this._queryVector = this._query as number[]
|
||||||
|
}
|
||||||
|
|
||||||
|
const buffer = await tableSearch.call(this._tbl, this)
|
||||||
|
const data = tableFromIPC(buffer)
|
||||||
|
|
||||||
|
return data.toArray().map((entry: Record<string, unknown>) => {
|
||||||
|
const newObject: Record<string, unknown> = {}
|
||||||
|
Object.keys(entry).forEach((key: string) => {
|
||||||
|
if (entry[key] instanceof Vector) {
|
||||||
|
newObject[key] = (entry[key] as Vector).toArray()
|
||||||
|
} else {
|
||||||
|
newObject[key] = entry[key]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return newObject as unknown as T
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
105
node/src/remote/client.ts
Normal file
105
node/src/remote/client.ts
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
// Copyright 2023 LanceDB Developers.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
import axios, { type AxiosResponse } from 'axios'
|
||||||
|
|
||||||
|
import { tableFromIPC, type Table as ArrowTable } from 'apache-arrow'
|
||||||
|
|
||||||
|
export class HttpLancedbClient {
|
||||||
|
private readonly _url: string
|
||||||
|
|
||||||
|
public constructor (
|
||||||
|
url: string,
|
||||||
|
private readonly _apiKey: string,
|
||||||
|
private readonly _dbName?: string
|
||||||
|
) {
|
||||||
|
this._url = url
|
||||||
|
}
|
||||||
|
|
||||||
|
get uri (): string {
|
||||||
|
return this._url
|
||||||
|
}
|
||||||
|
|
||||||
|
public async search (
|
||||||
|
tableName: string,
|
||||||
|
vector: number[],
|
||||||
|
k: number,
|
||||||
|
nprobes: number,
|
||||||
|
refineFactor?: number,
|
||||||
|
columns?: string[],
|
||||||
|
filter?: string
|
||||||
|
): Promise<ArrowTable<any>> {
|
||||||
|
const response = await axios.post(
|
||||||
|
`${this._url}/v1/table/${tableName}`,
|
||||||
|
{
|
||||||
|
vector,
|
||||||
|
k,
|
||||||
|
nprobes,
|
||||||
|
refineFactor,
|
||||||
|
columns,
|
||||||
|
filter
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': this._apiKey,
|
||||||
|
...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
|
||||||
|
},
|
||||||
|
responseType: 'arraybuffer',
|
||||||
|
timeout: 10000
|
||||||
|
}
|
||||||
|
).catch((err) => {
|
||||||
|
console.error('error: ', err)
|
||||||
|
return err.response
|
||||||
|
})
|
||||||
|
if (response.status !== 200) {
|
||||||
|
const errorData = new TextDecoder().decode(response.data)
|
||||||
|
throw new Error(
|
||||||
|
`Server Error, status: ${response.status as number}, ` +
|
||||||
|
`message: ${response.statusText as string}: ${errorData}`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const table = tableFromIPC(response.data)
|
||||||
|
return table
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sent GET request.
|
||||||
|
*/
|
||||||
|
public async get (path: string, params?: Record<string, string | number>): Promise<AxiosResponse> {
|
||||||
|
const response = await axios.get(
|
||||||
|
`${this._url}${path}`,
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': this._apiKey
|
||||||
|
},
|
||||||
|
params,
|
||||||
|
timeout: 10000
|
||||||
|
}
|
||||||
|
).catch((err) => {
|
||||||
|
console.error('error: ', err)
|
||||||
|
return err.response
|
||||||
|
})
|
||||||
|
if (response.status !== 200) {
|
||||||
|
const errorData = new TextDecoder().decode(response.data)
|
||||||
|
throw new Error(
|
||||||
|
`Server Error, status: ${response.status as number}, ` +
|
||||||
|
`message: ${response.statusText as string}: ${errorData}`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
return response
|
||||||
|
}
|
||||||
|
}
|
||||||
168
node/src/remote/index.ts
Normal file
168
node/src/remote/index.ts
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
// Copyright 2023 LanceDB Developers.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
import {
|
||||||
|
type EmbeddingFunction, type Table, type VectorIndexParams, type Connection,
|
||||||
|
type ConnectionOptions
|
||||||
|
} from '../index'
|
||||||
|
import { Query } from '../query'
|
||||||
|
|
||||||
|
import { type Table as ArrowTable, Vector } from 'apache-arrow'
|
||||||
|
import { HttpLancedbClient } from './client'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remote connection.
|
||||||
|
*/
|
||||||
|
export class RemoteConnection implements Connection {
|
||||||
|
private readonly _client: HttpLancedbClient
|
||||||
|
private readonly _dbName: string
|
||||||
|
|
||||||
|
constructor (opts: ConnectionOptions) {
|
||||||
|
if (!opts.uri.startsWith('db://')) {
|
||||||
|
throw new Error(`Invalid remote DB URI: ${opts.uri}`)
|
||||||
|
}
|
||||||
|
if (opts.apiKey === undefined || opts.region === undefined) {
|
||||||
|
throw new Error('API key and region are not supported for remote connections')
|
||||||
|
}
|
||||||
|
|
||||||
|
this._dbName = opts.uri.slice('db://'.length)
|
||||||
|
let server: string
|
||||||
|
if (opts.hostOverride === undefined) {
|
||||||
|
server = `https://${this._dbName}.${opts.region}.api.lancedb.com`
|
||||||
|
} else {
|
||||||
|
server = opts.hostOverride
|
||||||
|
}
|
||||||
|
this._client = new HttpLancedbClient(server, opts.apiKey, opts.hostOverride === undefined ? undefined : this._dbName)
|
||||||
|
}
|
||||||
|
|
||||||
|
get uri (): string {
|
||||||
|
// add the lancedb+ prefix back
|
||||||
|
return 'db://' + this._client.uri
|
||||||
|
}
|
||||||
|
|
||||||
|
async tableNames (): Promise<string[]> {
|
||||||
|
const response = await this._client.get('/v1/table/')
|
||||||
|
return response.data.tables
|
||||||
|
}
|
||||||
|
|
||||||
|
async openTable (name: string): Promise<Table>
|
||||||
|
async openTable<T> (name: string, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
|
||||||
|
async openTable<T> (name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
|
||||||
|
if (embeddings !== undefined) {
|
||||||
|
return new RemoteTable(this._client, name, embeddings)
|
||||||
|
} else {
|
||||||
|
return new RemoteTable(this._client, name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table>
|
||||||
|
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
|
||||||
|
async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
|
||||||
|
throw new Error('Not implemented')
|
||||||
|
}
|
||||||
|
|
||||||
|
async createTableArrow (name: string, table: ArrowTable): Promise<Table> {
|
||||||
|
throw new Error('Not implemented')
|
||||||
|
}
|
||||||
|
|
||||||
|
async dropTable (name: string): Promise<void> {
|
||||||
|
throw new Error('Not implemented')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class RemoteQuery<T = number[]> extends Query<T> {
|
||||||
|
constructor (query: T, private readonly _client: HttpLancedbClient,
|
||||||
|
private readonly _name: string, embeddings?: EmbeddingFunction<T>) {
|
||||||
|
super(query, undefined, embeddings)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: refactor this to a base class + queryImpl pattern
|
||||||
|
async execute<T = Record<string, unknown>>(): Promise<T[]> {
|
||||||
|
const embeddings = this._embeddings
|
||||||
|
const query = (this as any)._query
|
||||||
|
let queryVector: number[]
|
||||||
|
|
||||||
|
if (embeddings !== undefined) {
|
||||||
|
queryVector = (await embeddings.embed([query]))[0]
|
||||||
|
} else {
|
||||||
|
queryVector = query as number[]
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await this._client.search(
|
||||||
|
this._name,
|
||||||
|
queryVector,
|
||||||
|
(this as any)._limit,
|
||||||
|
(this as any)._nprobes,
|
||||||
|
(this as any)._refineFactor,
|
||||||
|
(this as any)._select,
|
||||||
|
(this as any)._filter
|
||||||
|
)
|
||||||
|
|
||||||
|
return data.toArray().map((entry: Record<string, unknown>) => {
|
||||||
|
const newObject: Record<string, unknown> = {}
|
||||||
|
Object.keys(entry).forEach((key: string) => {
|
||||||
|
if (entry[key] instanceof Vector) {
|
||||||
|
newObject[key] = (entry[key] as Vector).toArray()
|
||||||
|
} else {
|
||||||
|
newObject[key] = entry[key]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return newObject as unknown as T
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// we are using extend until we have next next version release
|
||||||
|
// Table and Connection has both been refactored to interfaces
|
||||||
|
export class RemoteTable<T = number[]> implements Table<T> {
|
||||||
|
private readonly _client: HttpLancedbClient
|
||||||
|
private readonly _embeddings?: EmbeddingFunction<T>
|
||||||
|
private readonly _name: string
|
||||||
|
|
||||||
|
constructor (client: HttpLancedbClient, name: string)
|
||||||
|
constructor (client: HttpLancedbClient, name: string, embeddings: EmbeddingFunction<T>)
|
||||||
|
constructor (client: HttpLancedbClient, name: string, embeddings?: EmbeddingFunction<T>) {
|
||||||
|
this._client = client
|
||||||
|
this._name = name
|
||||||
|
this._embeddings = embeddings
|
||||||
|
}
|
||||||
|
|
||||||
|
get name (): string {
|
||||||
|
return this._name
|
||||||
|
}
|
||||||
|
|
||||||
|
search (query: T): Query<T> {
|
||||||
|
return new RemoteQuery(query, this._client, this._name)//, this._embeddings_new)
|
||||||
|
}
|
||||||
|
|
||||||
|
async add (data: Array<Record<string, unknown>>): Promise<number> {
|
||||||
|
throw new Error('Not implemented')
|
||||||
|
}
|
||||||
|
|
||||||
|
async overwrite (data: Array<Record<string, unknown>>): Promise<number> {
|
||||||
|
throw new Error('Not implemented')
|
||||||
|
}
|
||||||
|
|
||||||
|
async createIndex (indexParams: VectorIndexParams): Promise<any> {
|
||||||
|
throw new Error('Not implemented')
|
||||||
|
}
|
||||||
|
|
||||||
|
async countRows (): Promise<number> {
|
||||||
|
throw new Error('Not implemented')
|
||||||
|
}
|
||||||
|
|
||||||
|
async delete (filter: string): Promise<void> {
|
||||||
|
throw new Error('Not implemented')
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -18,7 +18,8 @@ import * as chai from 'chai'
|
|||||||
import * as chaiAsPromised from 'chai-as-promised'
|
import * as chaiAsPromised from 'chai-as-promised'
|
||||||
|
|
||||||
import * as lancedb from '../index'
|
import * as lancedb from '../index'
|
||||||
import { type AwsCredentials, type EmbeddingFunction, MetricType, Query, WriteMode } from '../index'
|
import { type AwsCredentials, type EmbeddingFunction, MetricType, WriteMode } from '../index'
|
||||||
|
import { Query } from '../query'
|
||||||
|
|
||||||
const expect = chai.expect
|
const expect = chai.expect
|
||||||
const assert = chai.assert
|
const assert = chai.assert
|
||||||
@@ -268,7 +269,7 @@ describe('LanceDB client', function () {
|
|||||||
|
|
||||||
describe('Query object', function () {
|
describe('Query object', function () {
|
||||||
it('sets custom parameters', async function () {
|
it('sets custom parameters', async function () {
|
||||||
const query = new Query(undefined, [0.1, 0.3])
|
const query = new Query([0.1, 0.3])
|
||||||
.limit(1)
|
.limit(1)
|
||||||
.metricType(MetricType.Cosine)
|
.metricType(MetricType.Cosine)
|
||||||
.refineFactor(100)
|
.refineFactor(100)
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[bumpversion]
|
[bumpversion]
|
||||||
current_version = 0.1.8
|
current_version = 0.1.13
|
||||||
commit = True
|
commit = True
|
||||||
message = [python] Bump version: {current_version} → {new_version}
|
message = [python] Bump version: {current_version} → {new_version}
|
||||||
tag = True
|
tag = True
|
||||||
|
|||||||
@@ -13,17 +13,18 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import functools
|
|
||||||
import os
|
import os
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
from pyarrow import fs
|
from pyarrow import fs
|
||||||
|
|
||||||
from .common import DATA, URI
|
from .common import DATA, URI
|
||||||
from .table import LanceTable, Table
|
from .table import LanceTable, Table
|
||||||
from .util import get_uri_location, get_uri_scheme
|
from .util import fs_from_uri, get_uri_location, get_uri_scheme
|
||||||
|
|
||||||
|
|
||||||
class DBConnection(ABC):
|
class DBConnection(ABC):
|
||||||
@@ -38,8 +39,10 @@ class DBConnection(ABC):
|
|||||||
def create_table(
|
def create_table(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
data: DATA = None,
|
data: Optional[
|
||||||
schema: pa.Schema = None,
|
Union[List[dict], dict, pd.DataFrame, pa.Table, Iterable[pa.RecordBatch]],
|
||||||
|
] = None,
|
||||||
|
schema: Optional[pa.Schema] = None,
|
||||||
mode: str = "create",
|
mode: str = "create",
|
||||||
on_bad_vectors: str = "error",
|
on_bad_vectors: str = "error",
|
||||||
fill_value: float = 0.0,
|
fill_value: float = 0.0,
|
||||||
@@ -51,7 +54,7 @@ class DBConnection(ABC):
|
|||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
data: list, tuple, dict, pd.DataFrame; optional
|
data: list, tuple, dict, pd.DataFrame; optional
|
||||||
The data to insert into the table.
|
The data to initialize the table. User must provide at least one of `data` or `schema`.
|
||||||
schema: pyarrow.Schema; optional
|
schema: pyarrow.Schema; optional
|
||||||
The schema of the table.
|
The schema of the table.
|
||||||
mode: str; default "create"
|
mode: str; default "create"
|
||||||
@@ -64,16 +67,16 @@ class DBConnection(ABC):
|
|||||||
fill_value: float
|
fill_value: float
|
||||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||||
|
|
||||||
Note
|
|
||||||
----
|
|
||||||
The vector index won't be created by default.
|
|
||||||
To create the index, call the `create_index` method on the table.
|
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
LanceTable
|
LanceTable
|
||||||
A reference to the newly created table.
|
A reference to the newly created table.
|
||||||
|
|
||||||
|
!!! note
|
||||||
|
|
||||||
|
The vector index won't be created by default.
|
||||||
|
To create the index, call the `create_index` method on the table.
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
|
|
||||||
@@ -119,7 +122,7 @@ class DBConnection(ABC):
|
|||||||
|
|
||||||
Data is converted to Arrow before being written to disk. For maximum
|
Data is converted to Arrow before being written to disk. For maximum
|
||||||
control over how data is saved, either provide the PyArrow schema to
|
control over how data is saved, either provide the PyArrow schema to
|
||||||
convert to or else provide a PyArrow table directly.
|
convert to or else provide a [PyArrow Table](pyarrow.Table) directly.
|
||||||
|
|
||||||
>>> custom_schema = pa.schema([
|
>>> custom_schema = pa.schema([
|
||||||
... pa.field("vector", pa.list_(pa.float32(), 2)),
|
... pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||||
@@ -138,6 +141,30 @@ class DBConnection(ABC):
|
|||||||
vector: [[[1.1,1.2],[0.2,1.8]]]
|
vector: [[[1.1,1.2],[0.2,1.8]]]
|
||||||
lat: [[45.5,40.1]]
|
lat: [[45.5,40.1]]
|
||||||
long: [[-122.7,-74.1]]
|
long: [[-122.7,-74.1]]
|
||||||
|
|
||||||
|
|
||||||
|
It is also possible to create an table from `[Iterable[pa.RecordBatch]]`:
|
||||||
|
|
||||||
|
|
||||||
|
>>> import pyarrow as pa
|
||||||
|
>>> def make_batches():
|
||||||
|
... for i in range(5):
|
||||||
|
... yield pa.RecordBatch.from_arrays(
|
||||||
|
... [
|
||||||
|
... pa.array([[3.1, 4.1], [5.9, 26.5]]),
|
||||||
|
... pa.array(["foo", "bar"]),
|
||||||
|
... pa.array([10.0, 20.0]),
|
||||||
|
... ],
|
||||||
|
... ["vector", "item", "price"],
|
||||||
|
... )
|
||||||
|
>>> schema=pa.schema([
|
||||||
|
... pa.field("vector", pa.list_(pa.float32())),
|
||||||
|
... pa.field("item", pa.utf8()),
|
||||||
|
... pa.field("price", pa.float32()),
|
||||||
|
... ])
|
||||||
|
>>> db.create_table("table4", make_batches(), schema=schema)
|
||||||
|
LanceTable(table4)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@@ -225,7 +252,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
A list of table names.
|
A list of table names.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
filesystem, path = fs.FileSystem.from_uri(self.uri)
|
filesystem, path = fs_from_uri(self.uri)
|
||||||
except pa.ArrowInvalid:
|
except pa.ArrowInvalid:
|
||||||
raise NotImplementedError("Unsupported scheme: " + self.uri)
|
raise NotImplementedError("Unsupported scheme: " + self.uri)
|
||||||
|
|
||||||
@@ -252,7 +279,7 @@ class LanceDBConnection(DBConnection):
|
|||||||
def create_table(
|
def create_table(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
data: DATA = None,
|
data: Optional[Union[List[dict], dict, pd.DataFrame]] = None,
|
||||||
schema: pa.Schema = None,
|
schema: pa.Schema = None,
|
||||||
mode: str = "create",
|
mode: str = "create",
|
||||||
on_bad_vectors: str = "error",
|
on_bad_vectors: str = "error",
|
||||||
@@ -260,114 +287,22 @@ class LanceDBConnection(DBConnection):
|
|||||||
) -> LanceTable:
|
) -> LanceTable:
|
||||||
"""Create a table in the database.
|
"""Create a table in the database.
|
||||||
|
|
||||||
Parameters
|
See
|
||||||
----------
|
---
|
||||||
name: str
|
DBConnection.create_table
|
||||||
The name of the table.
|
|
||||||
data: list, tuple, dict, pd.DataFrame; optional
|
|
||||||
The data to insert into the table.
|
|
||||||
schema: pyarrow.Schema; optional
|
|
||||||
The schema of the table.
|
|
||||||
mode: str; default "create"
|
|
||||||
The mode to use when creating the table. Can be either "create" or "overwrite".
|
|
||||||
By default, if the table already exists, an exception is raised.
|
|
||||||
If you want to overwrite the table, use mode="overwrite".
|
|
||||||
on_bad_vectors: str, default "error"
|
|
||||||
What to do if any of the vectors are not the same size or contains NaNs.
|
|
||||||
One of "error", "drop", "fill".
|
|
||||||
fill_value: float
|
|
||||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
|
||||||
|
|
||||||
Note
|
|
||||||
----
|
|
||||||
The vector index won't be created by default.
|
|
||||||
To create the index, call the `create_index` method on the table.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
LanceTable
|
|
||||||
A reference to the newly created table.
|
|
||||||
|
|
||||||
Examples
|
|
||||||
--------
|
|
||||||
|
|
||||||
Can create with list of tuples or dictionaries:
|
|
||||||
|
|
||||||
>>> import lancedb
|
|
||||||
>>> db = lancedb.connect("./.lancedb")
|
|
||||||
>>> data = [{"vector": [1.1, 1.2], "lat": 45.5, "long": -122.7},
|
|
||||||
... {"vector": [0.2, 1.8], "lat": 40.1, "long": -74.1}]
|
|
||||||
>>> db.create_table("my_table", data)
|
|
||||||
LanceTable(my_table)
|
|
||||||
>>> db["my_table"].head()
|
|
||||||
pyarrow.Table
|
|
||||||
vector: fixed_size_list<item: float>[2]
|
|
||||||
child 0, item: float
|
|
||||||
lat: double
|
|
||||||
long: double
|
|
||||||
----
|
|
||||||
vector: [[[1.1,1.2],[0.2,1.8]]]
|
|
||||||
lat: [[45.5,40.1]]
|
|
||||||
long: [[-122.7,-74.1]]
|
|
||||||
|
|
||||||
You can also pass a pandas DataFrame:
|
|
||||||
|
|
||||||
>>> import pandas as pd
|
|
||||||
>>> data = pd.DataFrame({
|
|
||||||
... "vector": [[1.1, 1.2], [0.2, 1.8]],
|
|
||||||
... "lat": [45.5, 40.1],
|
|
||||||
... "long": [-122.7, -74.1]
|
|
||||||
... })
|
|
||||||
>>> db.create_table("table2", data)
|
|
||||||
LanceTable(table2)
|
|
||||||
>>> db["table2"].head()
|
|
||||||
pyarrow.Table
|
|
||||||
vector: fixed_size_list<item: float>[2]
|
|
||||||
child 0, item: float
|
|
||||||
lat: double
|
|
||||||
long: double
|
|
||||||
----
|
|
||||||
vector: [[[1.1,1.2],[0.2,1.8]]]
|
|
||||||
lat: [[45.5,40.1]]
|
|
||||||
long: [[-122.7,-74.1]]
|
|
||||||
|
|
||||||
Data is converted to Arrow before being written to disk. For maximum
|
|
||||||
control over how data is saved, either provide the PyArrow schema to
|
|
||||||
convert to or else provide a PyArrow table directly.
|
|
||||||
|
|
||||||
>>> custom_schema = pa.schema([
|
|
||||||
... pa.field("vector", pa.list_(pa.float32(), 2)),
|
|
||||||
... pa.field("lat", pa.float32()),
|
|
||||||
... pa.field("long", pa.float32())
|
|
||||||
... ])
|
|
||||||
>>> db.create_table("table3", data, schema = custom_schema)
|
|
||||||
LanceTable(table3)
|
|
||||||
>>> db["table3"].head()
|
|
||||||
pyarrow.Table
|
|
||||||
vector: fixed_size_list<item: float>[2]
|
|
||||||
child 0, item: float
|
|
||||||
lat: float
|
|
||||||
long: float
|
|
||||||
----
|
|
||||||
vector: [[[1.1,1.2],[0.2,1.8]]]
|
|
||||||
lat: [[45.5,40.1]]
|
|
||||||
long: [[-122.7,-74.1]]
|
|
||||||
"""
|
"""
|
||||||
if mode.lower() not in ["create", "overwrite"]:
|
if mode.lower() not in ["create", "overwrite"]:
|
||||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||||
|
|
||||||
if data is not None:
|
tbl = LanceTable.create(
|
||||||
tbl = LanceTable.create(
|
self,
|
||||||
self,
|
name,
|
||||||
name,
|
data,
|
||||||
data,
|
schema,
|
||||||
schema,
|
mode=mode,
|
||||||
mode=mode,
|
on_bad_vectors=on_bad_vectors,
|
||||||
on_bad_vectors=on_bad_vectors,
|
fill_value=fill_value,
|
||||||
fill_value=fill_value,
|
)
|
||||||
)
|
|
||||||
else:
|
|
||||||
tbl = LanceTable.open(self, name)
|
|
||||||
return tbl
|
return tbl
|
||||||
|
|
||||||
def open_table(self, name: str) -> LanceTable:
|
def open_table(self, name: str) -> LanceTable:
|
||||||
|
|||||||
@@ -11,27 +11,37 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
"""Pydantic adapter for LanceDB"""
|
"""Pydantic (v1 / v2) adapter for LanceDB"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import inspect
|
import inspect
|
||||||
import sys
|
import sys
|
||||||
import types
|
import types
|
||||||
from abc import ABC, abstractstaticmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Any, List, Type, Union, _GenericAlias
|
from typing import Any, Callable, Dict, Generator, List, Type, Union, _GenericAlias
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pydantic
|
import pydantic
|
||||||
from pydantic_core import CoreSchema, core_schema
|
import semver
|
||||||
|
|
||||||
|
PYDANTIC_VERSION = semver.Version.parse(pydantic.__version__)
|
||||||
|
try:
|
||||||
|
from pydantic_core import CoreSchema, core_schema
|
||||||
|
except ImportError:
|
||||||
|
if PYDANTIC_VERSION >= (2,):
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
class FixedSizeListMixin(ABC):
|
class FixedSizeListMixin(ABC):
|
||||||
@abstractstaticmethod
|
@staticmethod
|
||||||
|
@abstractmethod
|
||||||
def dim() -> int:
|
def dim() -> int:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractstaticmethod
|
@staticmethod
|
||||||
|
@abstractmethod
|
||||||
def value_arrow_type() -> pa.DataType:
|
def value_arrow_type() -> pa.DataType:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@@ -41,9 +51,15 @@ def vector(
|
|||||||
) -> Type[FixedSizeListMixin]:
|
) -> Type[FixedSizeListMixin]:
|
||||||
"""Pydantic Vector Type.
|
"""Pydantic Vector Type.
|
||||||
|
|
||||||
Note
|
!!! warning
|
||||||
----
|
Experimental feature.
|
||||||
Experimental feature.
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
dim : int
|
||||||
|
The dimension of the vector.
|
||||||
|
value_type : pyarrow.DataType, optional
|
||||||
|
The value type of the vector, by default pa.float32()
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@@ -52,13 +68,22 @@ def vector(
|
|||||||
>>> from lancedb.pydantic import vector
|
>>> from lancedb.pydantic import vector
|
||||||
...
|
...
|
||||||
>>> class MyModel(pydantic.BaseModel):
|
>>> class MyModel(pydantic.BaseModel):
|
||||||
... vector: vector(756)
|
|
||||||
... id: int
|
... id: int
|
||||||
... description: str
|
... url: str
|
||||||
|
... embeddings: vector(768)
|
||||||
|
>>> schema = pydantic_to_schema(MyModel)
|
||||||
|
>>> assert schema == pa.schema([
|
||||||
|
... pa.field("id", pa.int64(), False),
|
||||||
|
... pa.field("url", pa.utf8(), False),
|
||||||
|
... pa.field("embeddings", pa.list_(pa.float32(), 768), False)
|
||||||
|
... ])
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# TODO: make a public parameterized type.
|
# TODO: make a public parameterized type.
|
||||||
class FixedSizeList(list, FixedSizeListMixin):
|
class FixedSizeList(list, FixedSizeListMixin):
|
||||||
|
def __repr__(self):
|
||||||
|
return f"FixedSizeList(dim={dim})"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def dim() -> int:
|
def dim() -> int:
|
||||||
return dim
|
return dim
|
||||||
@@ -80,6 +105,25 @@ def vector(
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def __get_validators__(cls) -> Generator[Callable, None, None]:
|
||||||
|
yield cls.validate
|
||||||
|
|
||||||
|
# For pydantic v1
|
||||||
|
@classmethod
|
||||||
|
def validate(cls, v):
|
||||||
|
if not isinstance(v, (list, range, np.ndarray)) or len(v) != dim:
|
||||||
|
raise TypeError("A list of numbers or numpy.ndarray is needed")
|
||||||
|
return v
|
||||||
|
|
||||||
|
if PYDANTIC_VERSION < (2, 0):
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def __modify_schema__(cls, field_schema: Dict[str, Any]):
|
||||||
|
field_schema["items"] = {"type": "number"}
|
||||||
|
field_schema["maxItems"] = dim
|
||||||
|
field_schema["minItems"] = dim
|
||||||
|
|
||||||
return FixedSizeList
|
return FixedSizeList
|
||||||
|
|
||||||
|
|
||||||
@@ -106,11 +150,20 @@ def _py_type_to_arrow_type(py_type: Type[Any]) -> pa.DataType:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _pydantic_model_to_fields(model: pydantic.BaseModel) -> List[pa.Field]:
|
if PYDANTIC_VERSION.major < 2:
|
||||||
fields = []
|
|
||||||
for name, field in model.model_fields.items():
|
def _pydantic_model_to_fields(model: pydantic.BaseModel) -> List[pa.Field]:
|
||||||
fields.append(_pydantic_to_field(name, field))
|
return [
|
||||||
return fields
|
_pydantic_to_field(name, field) for name, field in model.__fields__.items()
|
||||||
|
]
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
def _pydantic_model_to_fields(model: pydantic.BaseModel) -> List[pa.Field]:
|
||||||
|
return [
|
||||||
|
_pydantic_to_field(name, field)
|
||||||
|
for name, field in model.model_fields.items()
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def _pydantic_to_arrow_type(field: pydantic.fields.FieldInfo) -> pa.DataType:
|
def _pydantic_to_arrow_type(field: pydantic.fields.FieldInfo) -> pa.DataType:
|
||||||
@@ -163,7 +216,36 @@ def pydantic_to_schema(model: Type[pydantic.BaseModel]) -> pa.Schema:
|
|||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
A PyArrow Schema.
|
pyarrow.Schema
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
|
||||||
|
>>> from typing import List, Optional
|
||||||
|
>>> import pydantic
|
||||||
|
>>> from lancedb.pydantic import pydantic_to_schema
|
||||||
|
...
|
||||||
|
>>> class InnerModel(pydantic.BaseModel):
|
||||||
|
... a: str
|
||||||
|
... b: Optional[float]
|
||||||
|
>>>
|
||||||
|
>>> class FooModel(pydantic.BaseModel):
|
||||||
|
... id: int
|
||||||
|
... s: Optional[str] = None
|
||||||
|
... vec: List[float]
|
||||||
|
... li: List[int]
|
||||||
|
... inner: InnerModel
|
||||||
|
>>> schema = pydantic_to_schema(FooModel)
|
||||||
|
>>> assert schema == pa.schema([
|
||||||
|
... pa.field("id", pa.int64(), False),
|
||||||
|
... pa.field("s", pa.utf8(), True),
|
||||||
|
... pa.field("vec", pa.list_(pa.float64()), False),
|
||||||
|
... pa.field("li", pa.list_(pa.int64()), False),
|
||||||
|
... pa.field("inner", pa.struct([
|
||||||
|
... pa.field("a", pa.utf8(), False),
|
||||||
|
... pa.field("b", pa.float64(), True),
|
||||||
|
... ]), False),
|
||||||
|
... ])
|
||||||
"""
|
"""
|
||||||
fields = _pydantic_model_to_fields(model)
|
fields = _pydantic_model_to_fields(model)
|
||||||
return pa.schema(fields)
|
return pa.schema(fields)
|
||||||
|
|||||||
@@ -226,6 +226,7 @@ class LanceQueryBuilder:
|
|||||||
columns=self._columns,
|
columns=self._columns,
|
||||||
nprobes=self._nprobes,
|
nprobes=self._nprobes,
|
||||||
refine_factor=self._refine_factor,
|
refine_factor=self._refine_factor,
|
||||||
|
vector_column=self._vector_column,
|
||||||
)
|
)
|
||||||
return self._table._execute_query(query)
|
return self._table._execute_query(query)
|
||||||
|
|
||||||
|
|||||||
22
python/lancedb/remote/arrow.py
Normal file
22
python/lancedb/remote/arrow.py
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
# Copyright 2023 LanceDB Developers
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
|
|
||||||
|
def to_ipc_binary(table: pa.Table) -> bytes:
|
||||||
|
"""Serialize a PyArrow Table to IPC binary."""
|
||||||
|
sink = pa.BufferOutputStream()
|
||||||
|
with pa.ipc.new_stream(sink, table.schema) as writer:
|
||||||
|
writer.write_table(table)
|
||||||
|
return sink.getvalue().to_pybytes()
|
||||||
@@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
|
|
||||||
import functools
|
import functools
|
||||||
from typing import Any, Callable, Dict, Union
|
from typing import Any, Callable, Dict, Optional, Union
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import attr
|
import attr
|
||||||
@@ -24,6 +24,8 @@ from lancedb.common import Credential
|
|||||||
from lancedb.remote import VectorQuery, VectorQueryResult
|
from lancedb.remote import VectorQuery, VectorQueryResult
|
||||||
from lancedb.remote.errors import LanceDBClientError
|
from lancedb.remote.errors import LanceDBClientError
|
||||||
|
|
||||||
|
ARROW_STREAM_CONTENT_TYPE = "application/vnd.apache.arrow.stream"
|
||||||
|
|
||||||
|
|
||||||
def _check_not_closed(f):
|
def _check_not_closed(f):
|
||||||
@functools.wraps(f)
|
@functools.wraps(f)
|
||||||
@@ -59,9 +61,12 @@ class RestfulLanceDBClient:
|
|||||||
|
|
||||||
@functools.cached_property
|
@functools.cached_property
|
||||||
def headers(self) -> Dict[str, str]:
|
def headers(self) -> Dict[str, str]:
|
||||||
return {
|
headers = {
|
||||||
"x-api-key": self.api_key,
|
"x-api-key": self.api_key,
|
||||||
}
|
}
|
||||||
|
if self.region == "local": # Local test mode
|
||||||
|
headers["Host"] = f"{self.db_name}.{self.region}.api.lancedb.com"
|
||||||
|
return headers
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def _check_status(resp: aiohttp.ClientResponse):
|
async def _check_status(resp: aiohttp.ClientResponse):
|
||||||
@@ -93,7 +98,9 @@ class RestfulLanceDBClient:
|
|||||||
async def post(
|
async def post(
|
||||||
self,
|
self,
|
||||||
uri: str,
|
uri: str,
|
||||||
data: Union[Dict[str, Any], BaseModel],
|
data: Union[Dict[str, Any], BaseModel, bytes],
|
||||||
|
params: Optional[Dict[str, Any]] = None,
|
||||||
|
content_type: Optional[str] = None,
|
||||||
deserialize: Callable = lambda resp: resp.json(),
|
deserialize: Callable = lambda resp: resp.json(),
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Send a POST request and returns the deserialized response payload.
|
"""Send a POST request and returns the deserialized response payload.
|
||||||
@@ -107,10 +114,19 @@ class RestfulLanceDBClient:
|
|||||||
"""
|
"""
|
||||||
if isinstance(data, BaseModel):
|
if isinstance(data, BaseModel):
|
||||||
data: Dict[str, Any] = data.dict(exclude_none=True)
|
data: Dict[str, Any] = data.dict(exclude_none=True)
|
||||||
|
if isinstance(data, bytes):
|
||||||
|
req_kwargs = {"data": data}
|
||||||
|
else:
|
||||||
|
req_kwargs = {"json": data}
|
||||||
|
|
||||||
|
headers = self.headers.copy()
|
||||||
|
if content_type is not None:
|
||||||
|
headers["content-type"] = content_type
|
||||||
async with self.session.post(
|
async with self.session.post(
|
||||||
uri,
|
uri,
|
||||||
json=data,
|
headers=headers,
|
||||||
headers=self.headers,
|
params=params,
|
||||||
|
**req_kwargs,
|
||||||
) as resp:
|
) as resp:
|
||||||
resp: aiohttp.ClientResponse = resp
|
resp: aiohttp.ClientResponse = resp
|
||||||
await self._check_status(resp)
|
await self._check_status(resp)
|
||||||
@@ -119,11 +135,11 @@ class RestfulLanceDBClient:
|
|||||||
@_check_not_closed
|
@_check_not_closed
|
||||||
async def list_tables(self):
|
async def list_tables(self):
|
||||||
"""List all tables in the database."""
|
"""List all tables in the database."""
|
||||||
json = await self.get("/1/table/", {})
|
json = await self.get("/v1/table/", {})
|
||||||
return json["tables"]
|
return json["tables"]
|
||||||
|
|
||||||
@_check_not_closed
|
@_check_not_closed
|
||||||
async def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult:
|
async def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult:
|
||||||
"""Query a table."""
|
"""Query a table."""
|
||||||
tbl = await self.post(f"/1/table/{table_name}/", query, deserialize=_read_ipc)
|
tbl = await self.post(f"/v1/table/{table_name}/", query, deserialize=_read_ipc)
|
||||||
return VectorQueryResult(tbl)
|
return VectorQueryResult(tbl)
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import uuid
|
||||||
from typing import List
|
from typing import List
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
@@ -19,9 +20,11 @@ import pyarrow as pa
|
|||||||
|
|
||||||
from lancedb.common import DATA
|
from lancedb.common import DATA
|
||||||
from lancedb.db import DBConnection
|
from lancedb.db import DBConnection
|
||||||
from lancedb.table import Table
|
from lancedb.schema import schema_to_json
|
||||||
|
from lancedb.table import Table, _sanitize_data
|
||||||
|
|
||||||
from .client import RestfulLanceDBClient
|
from .arrow import to_ipc_binary
|
||||||
|
from .client import ARROW_STREAM_CONTENT_TYPE, RestfulLanceDBClient
|
||||||
|
|
||||||
|
|
||||||
class RemoteDBConnection(DBConnection):
|
class RemoteDBConnection(DBConnection):
|
||||||
@@ -71,8 +74,31 @@ class RemoteDBConnection(DBConnection):
|
|||||||
name: str,
|
name: str,
|
||||||
data: DATA = None,
|
data: DATA = None,
|
||||||
schema: pa.Schema = None,
|
schema: pa.Schema = None,
|
||||||
mode: str = "create",
|
|
||||||
on_bad_vectors: str = "error",
|
on_bad_vectors: str = "error",
|
||||||
fill_value: float = 0.0,
|
fill_value: float = 0.0,
|
||||||
) -> Table:
|
) -> Table:
|
||||||
raise NotImplementedError
|
if data is None and schema is None:
|
||||||
|
raise ValueError("Either data or schema must be provided.")
|
||||||
|
if data is not None:
|
||||||
|
data = _sanitize_data(
|
||||||
|
data, schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if schema is None:
|
||||||
|
raise ValueError("Either data or schema must be provided")
|
||||||
|
data = pa.Table.from_pylist([], schema=schema)
|
||||||
|
|
||||||
|
from .table import RemoteTable
|
||||||
|
|
||||||
|
data = to_ipc_binary(data)
|
||||||
|
request_id = uuid.uuid4().hex
|
||||||
|
|
||||||
|
self._loop.run_until_complete(
|
||||||
|
self._client.post(
|
||||||
|
f"/v1/table/{name}/create",
|
||||||
|
data=data,
|
||||||
|
params={"request_id": request_id},
|
||||||
|
content_type=ARROW_STREAM_CONTENT_TYPE,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return RemoteTable(self, name)
|
||||||
|
|||||||
@@ -11,6 +11,8 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
from functools import cached_property
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
@@ -18,7 +20,10 @@ import pyarrow as pa
|
|||||||
from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME
|
from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||||
|
|
||||||
from ..query import LanceQueryBuilder, Query
|
from ..query import LanceQueryBuilder, Query
|
||||||
from ..table import Query, Table
|
from ..schema import json_to_schema
|
||||||
|
from ..table import Query, Table, _sanitize_data
|
||||||
|
from .arrow import to_ipc_binary
|
||||||
|
from .client import ARROW_STREAM_CONTENT_TYPE
|
||||||
from .db import RemoteDBConnection
|
from .db import RemoteDBConnection
|
||||||
|
|
||||||
|
|
||||||
@@ -30,8 +35,14 @@ class RemoteTable(Table):
|
|||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"RemoteTable({self._conn.db_name}.{self.name})"
|
return f"RemoteTable({self._conn.db_name}.{self.name})"
|
||||||
|
|
||||||
|
@cached_property
|
||||||
def schema(self) -> pa.Schema:
|
def schema(self) -> pa.Schema:
|
||||||
raise NotImplementedError
|
"""Return the schema of the table."""
|
||||||
|
resp = self._conn._loop.run_until_complete(
|
||||||
|
self._conn._client.get(f"/v1/table/{self._name}/describe")
|
||||||
|
)
|
||||||
|
schema = json_to_schema(resp["schema"])
|
||||||
|
return schema
|
||||||
|
|
||||||
def to_arrow(self) -> pa.Table:
|
def to_arrow(self) -> pa.Table:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
@@ -53,7 +64,21 @@ class RemoteTable(Table):
|
|||||||
on_bad_vectors: str = "error",
|
on_bad_vectors: str = "error",
|
||||||
fill_value: float = 0.0,
|
fill_value: float = 0.0,
|
||||||
) -> int:
|
) -> int:
|
||||||
raise NotImplementedError
|
data = _sanitize_data(
|
||||||
|
data, self.schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||||
|
)
|
||||||
|
payload = to_ipc_binary(data)
|
||||||
|
|
||||||
|
request_id = uuid.uuid4().hex
|
||||||
|
|
||||||
|
self._conn._loop.run_until_complete(
|
||||||
|
self._conn._client.post(
|
||||||
|
f"/v1/table/{self._name}/insert",
|
||||||
|
data=payload,
|
||||||
|
params={"request_id": request_id, "mode": mode},
|
||||||
|
content_type=ARROW_STREAM_CONTENT_TYPE,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
def search(
|
def search(
|
||||||
self, query: Union[VEC, str], vector_column: str = VECTOR_COLUMN_NAME
|
self, query: Union[VEC, str], vector_column: str = VECTOR_COLUMN_NAME
|
||||||
@@ -63,3 +88,6 @@ class RemoteTable(Table):
|
|||||||
def _execute_query(self, query: Query) -> pa.Table:
|
def _execute_query(self, query: Query) -> pa.Table:
|
||||||
result = self._conn._client.query(self._name, query)
|
result = self._conn._client.query(self._name, query)
|
||||||
return self._conn._loop.run_until_complete(result).to_arrow()
|
return self._conn._loop.run_until_complete(result).to_arrow()
|
||||||
|
|
||||||
|
def delete(self, predicate: str):
|
||||||
|
raise NotImplementedError
|
||||||
|
|||||||
@@ -13,10 +13,10 @@
|
|||||||
|
|
||||||
"""Schema related utilities."""
|
"""Schema related utilities."""
|
||||||
|
|
||||||
import json
|
|
||||||
from typing import Any, Dict, Type
|
from typing import Any, Dict, Type
|
||||||
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
|
from lance import json_to_schema, schema_to_json
|
||||||
|
|
||||||
|
|
||||||
def vector(dimension: int, value_type: pa.DataType = pa.float32()) -> pa.DataType:
|
def vector(dimension: int, value_type: pa.DataType = pa.float32()) -> pa.DataType:
|
||||||
@@ -43,247 +43,3 @@ def vector(dimension: int, value_type: pa.DataType = pa.float32()) -> pa.DataTyp
|
|||||||
... ])
|
... ])
|
||||||
"""
|
"""
|
||||||
return pa.list_(value_type, dimension)
|
return pa.list_(value_type, dimension)
|
||||||
|
|
||||||
|
|
||||||
def _type_to_dict(dt: pa.DataType) -> Dict[str, Any]:
|
|
||||||
if pa.types.is_boolean(dt):
|
|
||||||
return {"type": "boolean"}
|
|
||||||
elif pa.types.is_int8(dt):
|
|
||||||
return {"type": "int8"}
|
|
||||||
elif pa.types.is_int16(dt):
|
|
||||||
return {"type": "int16"}
|
|
||||||
elif pa.types.is_int32(dt):
|
|
||||||
return {"type": "int32"}
|
|
||||||
elif pa.types.is_int64(dt):
|
|
||||||
return {"type": "int64"}
|
|
||||||
elif pa.types.is_uint8(dt):
|
|
||||||
return {"type": "uint8"}
|
|
||||||
elif pa.types.is_uint16(dt):
|
|
||||||
return {"type": "uint16"}
|
|
||||||
elif pa.types.is_uint32(dt):
|
|
||||||
return {"type": "uint32"}
|
|
||||||
elif pa.types.is_uint64(dt):
|
|
||||||
return {"type": "uint64"}
|
|
||||||
elif pa.types.is_float16(dt):
|
|
||||||
return {"type": "float16"}
|
|
||||||
elif pa.types.is_float32(dt):
|
|
||||||
return {"type": "float32"}
|
|
||||||
elif pa.types.is_float64(dt):
|
|
||||||
return {"type": "float64"}
|
|
||||||
elif pa.types.is_date32(dt):
|
|
||||||
return {"type": f"date32"}
|
|
||||||
elif pa.types.is_date64(dt):
|
|
||||||
return {"type": f"date64"}
|
|
||||||
elif pa.types.is_time32(dt):
|
|
||||||
return {"type": f"time32:{dt.unit}"}
|
|
||||||
elif pa.types.is_time64(dt):
|
|
||||||
return {"type": f"time64:{dt.unit}"}
|
|
||||||
elif pa.types.is_timestamp(dt):
|
|
||||||
return {"type": f"timestamp:{dt.unit}:{dt.tz if dt.tz is not None else ''}"}
|
|
||||||
elif pa.types.is_string(dt):
|
|
||||||
return {"type": "string"}
|
|
||||||
elif pa.types.is_binary(dt):
|
|
||||||
return {"type": "binary"}
|
|
||||||
elif pa.types.is_large_string(dt):
|
|
||||||
return {"type": "large_string"}
|
|
||||||
elif pa.types.is_large_binary(dt):
|
|
||||||
return {"type": "large_binary"}
|
|
||||||
elif pa.types.is_fixed_size_binary(dt):
|
|
||||||
return {"type": "fixed_size_binary", "width": dt.byte_width}
|
|
||||||
elif pa.types.is_fixed_size_list(dt):
|
|
||||||
return {
|
|
||||||
"type": "fixed_size_list",
|
|
||||||
"width": dt.list_size,
|
|
||||||
"value_type": _type_to_dict(dt.value_type),
|
|
||||||
}
|
|
||||||
elif pa.types.is_list(dt):
|
|
||||||
return {
|
|
||||||
"type": "list",
|
|
||||||
"value_type": _type_to_dict(dt.value_type),
|
|
||||||
}
|
|
||||||
elif pa.types.is_struct(dt):
|
|
||||||
return {
|
|
||||||
"type": "struct",
|
|
||||||
"fields": [_field_to_dict(dt.field(i)) for i in range(dt.num_fields)],
|
|
||||||
}
|
|
||||||
elif pa.types.is_dictionary(dt):
|
|
||||||
return {
|
|
||||||
"type": "dictionary",
|
|
||||||
"index_type": _type_to_dict(dt.index_type),
|
|
||||||
"value_type": _type_to_dict(dt.value_type),
|
|
||||||
}
|
|
||||||
# TODO: support extension types
|
|
||||||
|
|
||||||
raise TypeError(f"Unsupported type: {dt}")
|
|
||||||
|
|
||||||
|
|
||||||
def _field_to_dict(field: pa.field) -> Dict[str, Any]:
|
|
||||||
ret = {
|
|
||||||
"name": field.name,
|
|
||||||
"type": _type_to_dict(field.type),
|
|
||||||
"nullable": field.nullable,
|
|
||||||
}
|
|
||||||
if field.metadata is not None:
|
|
||||||
ret["metadata"] = field.metadata
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
def schema_to_dict(schema: pa.Schema) -> Dict[str, Any]:
|
|
||||||
"""Convert a PyArrow [Schema](pyarrow.Schema) to a dictionary.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
schema : pa.Schema
|
|
||||||
The PyArrow Schema to convert
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
A dict of the data type.
|
|
||||||
|
|
||||||
Examples
|
|
||||||
--------
|
|
||||||
|
|
||||||
>>> import pyarrow as pa
|
|
||||||
>>> import lancedb
|
|
||||||
>>> schema = pa.schema(
|
|
||||||
... [
|
|
||||||
... pa.field("id", pa.int64()),
|
|
||||||
... pa.field("vector", lancedb.vector(512), nullable=False),
|
|
||||||
... pa.field(
|
|
||||||
... "struct",
|
|
||||||
... pa.struct(
|
|
||||||
... [
|
|
||||||
... pa.field("a", pa.utf8()),
|
|
||||||
... pa.field("b", pa.float32()),
|
|
||||||
... ]
|
|
||||||
... ),
|
|
||||||
... True,
|
|
||||||
... ),
|
|
||||||
... ],
|
|
||||||
... metadata={"key": "value"},
|
|
||||||
... )
|
|
||||||
>>> json_schema = schema_to_dict(schema)
|
|
||||||
>>> assert json_schema == {
|
|
||||||
... "fields": [
|
|
||||||
... {"name": "id", "type": {"type": "int64"}, "nullable": True},
|
|
||||||
... {
|
|
||||||
... "name": "vector",
|
|
||||||
... "type": {
|
|
||||||
... "type": "fixed_size_list",
|
|
||||||
... "value_type": {"type": "float32"},
|
|
||||||
... "width": 512,
|
|
||||||
... },
|
|
||||||
... "nullable": False,
|
|
||||||
... },
|
|
||||||
... {
|
|
||||||
... "name": "struct",
|
|
||||||
... "type": {
|
|
||||||
... "type": "struct",
|
|
||||||
... "fields": [
|
|
||||||
... {"name": "a", "type": {"type": "string"}, "nullable": True},
|
|
||||||
... {"name": "b", "type": {"type": "float32"}, "nullable": True},
|
|
||||||
... ],
|
|
||||||
... },
|
|
||||||
... "nullable": True,
|
|
||||||
... },
|
|
||||||
... ],
|
|
||||||
... "metadata": {"key": "value"},
|
|
||||||
... }
|
|
||||||
|
|
||||||
"""
|
|
||||||
fields = []
|
|
||||||
for name in schema.names:
|
|
||||||
field = schema.field(name)
|
|
||||||
fields.append(_field_to_dict(field))
|
|
||||||
json_schema = {
|
|
||||||
"fields": fields,
|
|
||||||
"metadata": {
|
|
||||||
k.decode("utf-8"): v.decode("utf-8") for (k, v) in schema.metadata.items()
|
|
||||||
}
|
|
||||||
if schema.metadata is not None
|
|
||||||
else {},
|
|
||||||
}
|
|
||||||
return json_schema
|
|
||||||
|
|
||||||
|
|
||||||
def _dict_to_type(dt: Dict[str, Any]) -> pa.DataType:
|
|
||||||
type_name = dt["type"]
|
|
||||||
try:
|
|
||||||
return {
|
|
||||||
"boolean": pa.bool_(),
|
|
||||||
"int8": pa.int8(),
|
|
||||||
"int16": pa.int16(),
|
|
||||||
"int32": pa.int32(),
|
|
||||||
"int64": pa.int64(),
|
|
||||||
"uint8": pa.uint8(),
|
|
||||||
"uint16": pa.uint16(),
|
|
||||||
"uint32": pa.uint32(),
|
|
||||||
"uint64": pa.uint64(),
|
|
||||||
"float16": pa.float16(),
|
|
||||||
"float32": pa.float32(),
|
|
||||||
"float64": pa.float64(),
|
|
||||||
"string": pa.string(),
|
|
||||||
"binary": pa.binary(),
|
|
||||||
"large_string": pa.large_string(),
|
|
||||||
"large_binary": pa.large_binary(),
|
|
||||||
"date32": pa.date32(),
|
|
||||||
"date64": pa.date64(),
|
|
||||||
}[type_name]
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if type_name == "fixed_size_binary":
|
|
||||||
return pa.binary(dt["width"])
|
|
||||||
elif type_name == "fixed_size_list":
|
|
||||||
return pa.list_(_dict_to_type(dt["value_type"]), dt["width"])
|
|
||||||
elif type_name == "list":
|
|
||||||
return pa.list_(_dict_to_type(dt["value_type"]))
|
|
||||||
elif type_name == "struct":
|
|
||||||
fields = []
|
|
||||||
for field in dt["fields"]:
|
|
||||||
fields.append(_dict_to_field(field))
|
|
||||||
return pa.struct(fields)
|
|
||||||
elif type_name == "dictionary":
|
|
||||||
return pa.dictionary(
|
|
||||||
_dict_to_type(dt["index_type"]), _dict_to_type(dt["value_type"])
|
|
||||||
)
|
|
||||||
elif type_name.startswith("time32:"):
|
|
||||||
return pa.time32(type_name.split(":")[1])
|
|
||||||
elif type_name.startswith("time64:"):
|
|
||||||
return pa.time64(type_name.split(":")[1])
|
|
||||||
elif type_name.startswith("timestamp:"):
|
|
||||||
fields = type_name.split(":")
|
|
||||||
unit = fields[1]
|
|
||||||
tz = fields[2] if len(fields) > 2 else None
|
|
||||||
return pa.timestamp(unit, tz)
|
|
||||||
raise TypeError(f"Unsupported type: {dt}")
|
|
||||||
|
|
||||||
|
|
||||||
def _dict_to_field(field: Dict[str, Any]) -> pa.Field:
|
|
||||||
name = field["name"]
|
|
||||||
nullable = field["nullable"] if "nullable" in field else True
|
|
||||||
dt = _dict_to_type(field["type"])
|
|
||||||
metadata = field.get("metadata", None)
|
|
||||||
return pa.field(name, dt, nullable, metadata)
|
|
||||||
|
|
||||||
|
|
||||||
def dict_to_schema(json: Dict[str, Any]) -> pa.Schema:
|
|
||||||
"""Reconstruct a PyArrow Schema from a JSON dict.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
json : Dict[str, Any]
|
|
||||||
The JSON dict to reconstruct Schema from.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
A PyArrow Schema.
|
|
||||||
"""
|
|
||||||
fields = []
|
|
||||||
for field in json["fields"]:
|
|
||||||
fields.append(_dict_to_field(field))
|
|
||||||
metadata = {
|
|
||||||
k.encode("utf-8"): v.encode("utf-8")
|
|
||||||
for (k, v) in json.get("metadata", {}).items()
|
|
||||||
}
|
|
||||||
return pa.schema(fields, metadata)
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ from __future__ import annotations
|
|||||||
import os
|
import os
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
from typing import List, Union
|
from typing import Iterable, List, Union
|
||||||
|
|
||||||
import lance
|
import lance
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -44,7 +44,7 @@ def _sanitize_data(data, schema, on_bad_vectors, fill_value):
|
|||||||
data = _sanitize_schema(
|
data = _sanitize_schema(
|
||||||
data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
|
||||||
)
|
)
|
||||||
if not isinstance(data, pa.Table):
|
if not isinstance(data, (pa.Table, Iterable)):
|
||||||
raise TypeError(f"Unsupported data type: {type(data)}")
|
raise TypeError(f"Unsupported data type: {type(data)}")
|
||||||
return data
|
return data
|
||||||
|
|
||||||
@@ -74,7 +74,6 @@ class Table(ABC):
|
|||||||
Can append new data with [Table.add()][lancedb.table.Table.add].
|
Can append new data with [Table.add()][lancedb.table.Table.add].
|
||||||
|
|
||||||
>>> table.add([{"vector": [0.5, 1.3], "b": 4}])
|
>>> table.add([{"vector": [0.5, 1.3], "b": 4}])
|
||||||
2
|
|
||||||
|
|
||||||
Can query the table with [Table.search][lancedb.table.Table.search].
|
Can query the table with [Table.search][lancedb.table.Table.search].
|
||||||
|
|
||||||
@@ -151,7 +150,7 @@ class Table(ABC):
|
|||||||
mode: str = "append",
|
mode: str = "append",
|
||||||
on_bad_vectors: str = "error",
|
on_bad_vectors: str = "error",
|
||||||
fill_value: float = 0.0,
|
fill_value: float = 0.0,
|
||||||
) -> int:
|
):
|
||||||
"""Add more data to the [Table](Table).
|
"""Add more data to the [Table](Table).
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
@@ -167,10 +166,6 @@ class Table(ABC):
|
|||||||
fill_value: float, default 0.
|
fill_value: float, default 0.
|
||||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
int
|
|
||||||
The number of vectors in the table.
|
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@@ -202,6 +197,51 @@ class Table(ABC):
|
|||||||
def _execute_query(self, query: Query) -> pa.Table:
|
def _execute_query(self, query: Query) -> pa.Table:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def delete(self, where: str):
|
||||||
|
"""Delete rows from the table.
|
||||||
|
|
||||||
|
This can be used to delete a single row, many rows, all rows, or
|
||||||
|
sometimes no rows (if your predicate matches nothing).
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
where: str
|
||||||
|
The SQL where clause to use when deleting rows. For example, 'x = 2'
|
||||||
|
or 'x IN (1, 2, 3)'. The filter must not be empty, or it will error.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> import lancedb
|
||||||
|
>>> import pandas as pd
|
||||||
|
>>> data = pd.DataFrame({"x": [1, 2, 3], "vector": [[1, 2], [3, 4], [5, 6]]})
|
||||||
|
>>> db = lancedb.connect("./.lancedb")
|
||||||
|
>>> table = db.create_table("my_table", data)
|
||||||
|
>>> table.to_pandas()
|
||||||
|
x vector
|
||||||
|
0 1 [1.0, 2.0]
|
||||||
|
1 2 [3.0, 4.0]
|
||||||
|
2 3 [5.0, 6.0]
|
||||||
|
>>> table.delete("x = 2")
|
||||||
|
>>> table.to_pandas()
|
||||||
|
x vector
|
||||||
|
0 1 [1.0, 2.0]
|
||||||
|
1 3 [5.0, 6.0]
|
||||||
|
|
||||||
|
If you have a list of values to delete, you can combine them into a
|
||||||
|
stringified list and use the `IN` operator:
|
||||||
|
|
||||||
|
>>> to_remove = [1, 5]
|
||||||
|
>>> to_remove = ", ".join([str(v) for v in to_remove])
|
||||||
|
>>> to_remove
|
||||||
|
'1, 5'
|
||||||
|
>>> table.delete(f"x IN ({to_remove})")
|
||||||
|
>>> table.to_pandas()
|
||||||
|
x vector
|
||||||
|
0 3 [5.0, 6.0]
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
class LanceTable(Table):
|
class LanceTable(Table):
|
||||||
"""
|
"""
|
||||||
@@ -262,7 +302,6 @@ class LanceTable(Table):
|
|||||||
vector type
|
vector type
|
||||||
0 [1.1, 0.9] vector
|
0 [1.1, 0.9] vector
|
||||||
>>> table.add([{"vector": [0.5, 0.2], "type": "vector"}])
|
>>> table.add([{"vector": [0.5, 0.2], "type": "vector"}])
|
||||||
2
|
|
||||||
>>> table.version
|
>>> table.version
|
||||||
2
|
2
|
||||||
>>> table.checkout(1)
|
>>> table.checkout(1)
|
||||||
@@ -364,7 +403,7 @@ class LanceTable(Table):
|
|||||||
mode: str = "append",
|
mode: str = "append",
|
||||||
on_bad_vectors: str = "error",
|
on_bad_vectors: str = "error",
|
||||||
fill_value: float = 0.0,
|
fill_value: float = 0.0,
|
||||||
) -> int:
|
):
|
||||||
"""Add data to the table.
|
"""Add data to the table.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
@@ -391,7 +430,6 @@ class LanceTable(Table):
|
|||||||
)
|
)
|
||||||
lance.write_dataset(data, self._dataset_uri, mode=mode)
|
lance.write_dataset(data, self._dataset_uri, mode=mode)
|
||||||
self._reset_dataset()
|
self._reset_dataset()
|
||||||
return len(self)
|
|
||||||
|
|
||||||
def search(
|
def search(
|
||||||
self, query: Union[VEC, str], vector_column_name=VECTOR_COLUMN_NAME
|
self, query: Union[VEC, str], vector_column_name=VECTOR_COLUMN_NAME
|
||||||
@@ -483,44 +521,21 @@ class LanceTable(Table):
|
|||||||
if schema is None:
|
if schema is None:
|
||||||
raise ValueError("Either data or schema must be provided")
|
raise ValueError("Either data or schema must be provided")
|
||||||
data = pa.Table.from_pylist([], schema=schema)
|
data = pa.Table.from_pylist([], schema=schema)
|
||||||
lance.write_dataset(data, tbl._dataset_uri, mode=mode)
|
lance.write_dataset(data, tbl._dataset_uri, schema=schema, mode=mode)
|
||||||
return LanceTable(db, name)
|
return LanceTable(db, name)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def open(cls, db, name):
|
def open(cls, db, name):
|
||||||
tbl = cls(db, name)
|
tbl = cls(db, name)
|
||||||
if not os.path.exists(tbl._dataset_uri):
|
fs, path = pa.fs.FileSystem.from_uri(tbl._dataset_uri)
|
||||||
|
file_info = fs.get_file_info(path)
|
||||||
|
if file_info.type != pa.fs.FileType.Directory:
|
||||||
raise FileNotFoundError(
|
raise FileNotFoundError(
|
||||||
f"Table {name} does not exist. Please first call db.create_table({name}, data)"
|
f"Table {name} does not exist. Please first call db.create_table({name}, data)"
|
||||||
)
|
)
|
||||||
return tbl
|
return tbl
|
||||||
|
|
||||||
def delete(self, where: str):
|
def delete(self, where: str):
|
||||||
"""Delete rows from the table.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
where: str
|
|
||||||
The SQL where clause to use when deleting rows.
|
|
||||||
|
|
||||||
Examples
|
|
||||||
--------
|
|
||||||
>>> import lancedb
|
|
||||||
>>> import pandas as pd
|
|
||||||
>>> data = pd.DataFrame({"x": [1, 2, 3], "vector": [[1, 2], [3, 4], [5, 6]]})
|
|
||||||
>>> db = lancedb.connect("./.lancedb")
|
|
||||||
>>> table = db.create_table("my_table", data)
|
|
||||||
>>> table.to_pandas()
|
|
||||||
x vector
|
|
||||||
0 1 [1.0, 2.0]
|
|
||||||
1 2 [3.0, 4.0]
|
|
||||||
2 3 [5.0, 6.0]
|
|
||||||
>>> table.delete("x = 2")
|
|
||||||
>>> table.to_pandas()
|
|
||||||
x vector
|
|
||||||
0 1 [1.0, 2.0]
|
|
||||||
1 3 [5.0, 6.0]
|
|
||||||
"""
|
|
||||||
self._dataset.delete(where)
|
self._dataset.delete(where)
|
||||||
|
|
||||||
def _execute_query(self, query: Query) -> pa.Table:
|
def _execute_query(self, query: Query) -> pa.Table:
|
||||||
|
|||||||
@@ -11,8 +11,13 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import os
|
||||||
|
from typing import Tuple
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import pyarrow as pa
|
||||||
|
import pyarrow.fs as pa_fs
|
||||||
|
|
||||||
|
|
||||||
def get_uri_scheme(uri: str) -> str:
|
def get_uri_scheme(uri: str) -> str:
|
||||||
"""
|
"""
|
||||||
@@ -59,3 +64,14 @@ def get_uri_location(uri: str) -> str:
|
|||||||
return parsed.path
|
return parsed.path
|
||||||
else:
|
else:
|
||||||
return parsed.netloc + parsed.path
|
return parsed.netloc + parsed.path
|
||||||
|
|
||||||
|
|
||||||
|
def fs_from_uri(uri: str) -> Tuple[pa_fs.FileSystem, str]:
|
||||||
|
"""
|
||||||
|
Get a PyArrow FileSystem from a URI, handling extra environment variables.
|
||||||
|
"""
|
||||||
|
if get_uri_scheme(uri) == "s3":
|
||||||
|
if os.environ["AWS_ENDPOINT"]:
|
||||||
|
uri += "?endpoint_override=" + os.environ["AWS_ENDPOINT"]
|
||||||
|
|
||||||
|
return pa_fs.FileSystem.from_uri(uri)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.1.10"
|
version = "0.1.13"
|
||||||
dependencies = ["pylance~=0.5.0", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic>=2", "attr"]
|
dependencies = ["pylance~=0.5.8", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic", "attr", "semver"]
|
||||||
description = "lancedb"
|
description = "lancedb"
|
||||||
authors = [
|
authors = [
|
||||||
{ name = "LanceDB Devs", email = "dev@lancedb.com" },
|
{ name = "LanceDB Devs", email = "dev@lancedb.com" },
|
||||||
@@ -52,3 +52,6 @@ requires = [
|
|||||||
"wheel",
|
"wheel",
|
||||||
]
|
]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[tool.isort]
|
||||||
|
profile = "black"
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import pyarrow as pa
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import lancedb
|
import lancedb
|
||||||
@@ -75,6 +76,32 @@ def test_ingest_pd(tmp_path):
|
|||||||
assert db.open_table("test").name == db["test"].name
|
assert db.open_table("test").name == db["test"].name
|
||||||
|
|
||||||
|
|
||||||
|
def test_ingest_record_batch_iterator(tmp_path):
|
||||||
|
def batch_reader():
|
||||||
|
for i in range(5):
|
||||||
|
yield pa.RecordBatch.from_arrays(
|
||||||
|
[
|
||||||
|
pa.array([[3.1, 4.1], [5.9, 26.5]]),
|
||||||
|
pa.array(["foo", "bar"]),
|
||||||
|
pa.array([10.0, 20.0]),
|
||||||
|
],
|
||||||
|
["vector", "item", "price"],
|
||||||
|
)
|
||||||
|
|
||||||
|
db = lancedb.connect(tmp_path)
|
||||||
|
tbl = db.create_table(
|
||||||
|
"test",
|
||||||
|
batch_reader(),
|
||||||
|
schema=pa.schema(
|
||||||
|
[
|
||||||
|
pa.field("vector", pa.list_(pa.float32())),
|
||||||
|
pa.field("item", pa.utf8()),
|
||||||
|
pa.field("price", pa.float32()),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_create_mode(tmp_path):
|
def test_create_mode(tmp_path):
|
||||||
db = lancedb.connect(tmp_path)
|
db = lancedb.connect(tmp_path)
|
||||||
data = pd.DataFrame(
|
data = pd.DataFrame(
|
||||||
@@ -131,6 +158,9 @@ def test_empty_or_nonexistent_table(tmp_path):
|
|||||||
with pytest.raises(Exception):
|
with pytest.raises(Exception):
|
||||||
db.open_table("does_not_exist")
|
db.open_table("does_not_exist")
|
||||||
|
|
||||||
|
schema = pa.schema([pa.field("a", pa.int32())])
|
||||||
|
db.create_table("test", schema=schema)
|
||||||
|
|
||||||
|
|
||||||
def test_replace_index(tmp_path):
|
def test_replace_index(tmp_path):
|
||||||
db = lancedb.connect(uri=tmp_path)
|
db = lancedb.connect(uri=tmp_path)
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ import pyarrow as pa
|
|||||||
import pydantic
|
import pydantic
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from lancedb.pydantic import pydantic_to_schema, vector
|
from lancedb.pydantic import PYDANTIC_VERSION, pydantic_to_schema, vector
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
@@ -111,10 +111,16 @@ def test_fixed_size_list_field():
|
|||||||
li: List[int]
|
li: List[int]
|
||||||
|
|
||||||
data = TestModel(vec=list(range(16)), li=[1, 2, 3])
|
data = TestModel(vec=list(range(16)), li=[1, 2, 3])
|
||||||
assert json.loads(data.model_dump_json()) == {
|
if PYDANTIC_VERSION >= (2,):
|
||||||
"vec": list(range(16)),
|
assert json.loads(data.model_dump_json()) == {
|
||||||
"li": [1, 2, 3],
|
"vec": list(range(16)),
|
||||||
}
|
"li": [1, 2, 3],
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
assert data.dict() == {
|
||||||
|
"vec": list(range(16)),
|
||||||
|
"li": [1, 2, 3],
|
||||||
|
}
|
||||||
|
|
||||||
schema = pydantic_to_schema(TestModel)
|
schema = pydantic_to_schema(TestModel)
|
||||||
assert schema == pa.schema(
|
assert schema == pa.schema(
|
||||||
@@ -124,7 +130,11 @@ def test_fixed_size_list_field():
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
json_schema = TestModel.model_json_schema()
|
if PYDANTIC_VERSION >= (2,):
|
||||||
|
json_schema = TestModel.model_json_schema()
|
||||||
|
else:
|
||||||
|
json_schema = TestModel.schema()
|
||||||
|
|
||||||
assert json_schema == {
|
assert json_schema == {
|
||||||
"properties": {
|
"properties": {
|
||||||
"vec": {
|
"vec": {
|
||||||
|
|||||||
@@ -119,6 +119,7 @@ def test_query_builder_with_different_vector_column():
|
|||||||
columns=["b"],
|
columns=["b"],
|
||||||
nprobes=20,
|
nprobes=20,
|
||||||
refine_factor=None,
|
refine_factor=None,
|
||||||
|
vector_column="foo_vector",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -1,109 +0,0 @@
|
|||||||
# Copyright 2023 LanceDB Developers
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
import pyarrow as pa
|
|
||||||
|
|
||||||
import lancedb
|
|
||||||
from lancedb.schema import dict_to_schema, schema_to_dict
|
|
||||||
|
|
||||||
|
|
||||||
def test_schema_to_dict():
|
|
||||||
schema = pa.schema(
|
|
||||||
[
|
|
||||||
pa.field("id", pa.int64()),
|
|
||||||
pa.field("vector", lancedb.vector(512), nullable=False),
|
|
||||||
pa.field(
|
|
||||||
"struct",
|
|
||||||
pa.struct(
|
|
||||||
[
|
|
||||||
pa.field("a", pa.utf8()),
|
|
||||||
pa.field("b", pa.float32()),
|
|
||||||
]
|
|
||||||
),
|
|
||||||
True,
|
|
||||||
),
|
|
||||||
pa.field("d", pa.dictionary(pa.int64(), pa.utf8()), False),
|
|
||||||
],
|
|
||||||
metadata={"key": "value"},
|
|
||||||
)
|
|
||||||
|
|
||||||
json_schema = schema_to_dict(schema)
|
|
||||||
assert json_schema == {
|
|
||||||
"fields": [
|
|
||||||
{"name": "id", "type": {"type": "int64"}, "nullable": True},
|
|
||||||
{
|
|
||||||
"name": "vector",
|
|
||||||
"type": {
|
|
||||||
"type": "fixed_size_list",
|
|
||||||
"value_type": {"type": "float32"},
|
|
||||||
"width": 512,
|
|
||||||
},
|
|
||||||
"nullable": False,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "struct",
|
|
||||||
"type": {
|
|
||||||
"type": "struct",
|
|
||||||
"fields": [
|
|
||||||
{"name": "a", "type": {"type": "string"}, "nullable": True},
|
|
||||||
{"name": "b", "type": {"type": "float32"}, "nullable": True},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"nullable": True,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "d",
|
|
||||||
"type": {
|
|
||||||
"type": "dictionary",
|
|
||||||
"index_type": {"type": "int64"},
|
|
||||||
"value_type": {"type": "string"},
|
|
||||||
},
|
|
||||||
"nullable": False,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
"metadata": {"key": "value"},
|
|
||||||
}
|
|
||||||
|
|
||||||
actual_schema = dict_to_schema(json_schema)
|
|
||||||
assert actual_schema == schema
|
|
||||||
|
|
||||||
|
|
||||||
def test_temporal_types():
|
|
||||||
schema = pa.schema(
|
|
||||||
[
|
|
||||||
pa.field("t32", pa.time32("s")),
|
|
||||||
pa.field("t32ms", pa.time32("ms")),
|
|
||||||
pa.field("t64", pa.time64("ns")),
|
|
||||||
pa.field("ts", pa.timestamp("s")),
|
|
||||||
pa.field("ts_us_tz", pa.timestamp("us", tz="America/New_York")),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
json_schema = schema_to_dict(schema)
|
|
||||||
|
|
||||||
assert json_schema == {
|
|
||||||
"fields": [
|
|
||||||
{"name": "t32", "type": {"type": "time32:s"}, "nullable": True},
|
|
||||||
{"name": "t32ms", "type": {"type": "time32:ms"}, "nullable": True},
|
|
||||||
{"name": "t64", "type": {"type": "time64:ns"}, "nullable": True},
|
|
||||||
{"name": "ts", "type": {"type": "timestamp:s:"}, "nullable": True},
|
|
||||||
{
|
|
||||||
"name": "ts_us_tz",
|
|
||||||
"type": {"type": "timestamp:us:America/New_York"},
|
|
||||||
"nullable": True,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
"metadata": {},
|
|
||||||
}
|
|
||||||
|
|
||||||
actual_schema = dict_to_schema(json_schema)
|
|
||||||
assert actual_schema == schema
|
|
||||||
@@ -139,8 +139,8 @@ def _add(table, schema):
|
|||||||
# table = LanceTable(db, "test")
|
# table = LanceTable(db, "test")
|
||||||
assert len(table) == 2
|
assert len(table) == 2
|
||||||
|
|
||||||
count = table.add([{"vector": [6.3, 100.5], "item": "new", "price": 30.0}])
|
table.add([{"vector": [6.3, 100.5], "item": "new", "price": 30.0}])
|
||||||
assert count == 3
|
assert len(table) == 3
|
||||||
|
|
||||||
expected = pa.Table.from_arrays(
|
expected = pa.Table.from_arrays(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "vectordb-node"
|
name = "vectordb-node"
|
||||||
version = "0.1.13"
|
version = "0.1.15"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
@@ -15,6 +15,7 @@ arrow-ipc = { workspace = true }
|
|||||||
arrow-schema = { workspace = true }
|
arrow-schema = { workspace = true }
|
||||||
once_cell = "1"
|
once_cell = "1"
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
|
half = { workspace = true }
|
||||||
lance = { workspace = true }
|
lance = { workspace = true }
|
||||||
vectordb = { path = "../../vectordb" }
|
vectordb = { path = "../../vectordb" }
|
||||||
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ use arrow_array::{Float32Array, RecordBatchIterator};
|
|||||||
use arrow_ipc::writer::FileWriter;
|
use arrow_ipc::writer::FileWriter;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use futures::{TryFutureExt, TryStreamExt};
|
use futures::{TryFutureExt, TryStreamExt};
|
||||||
use lance::dataset::{ReadParams, WriteMode, WriteParams};
|
use lance::dataset::{WriteMode, WriteParams};
|
||||||
use lance::index::vector::MetricType;
|
use lance::index::vector::MetricType;
|
||||||
use lance::io::object_store::ObjectStoreParams;
|
use lance::io::object_store::ObjectStoreParams;
|
||||||
use neon::prelude::*;
|
use neon::prelude::*;
|
||||||
@@ -33,7 +33,7 @@ use tokio::runtime::Runtime;
|
|||||||
|
|
||||||
use vectordb::database::Database;
|
use vectordb::database::Database;
|
||||||
use vectordb::error::Error;
|
use vectordb::error::Error;
|
||||||
use vectordb::table::{OpenTableParams, Table};
|
use vectordb::table::{ReadParams, Table};
|
||||||
|
|
||||||
use crate::arrow::arrow_buffer_to_record_batch;
|
use crate::arrow::arrow_buffer_to_record_batch;
|
||||||
|
|
||||||
@@ -177,7 +177,7 @@ fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
Err(err) => return err,
|
Err(err) => return err,
|
||||||
};
|
};
|
||||||
|
|
||||||
let param = ReadParams {
|
let params = ReadParams {
|
||||||
store_options: Some(ObjectStoreParams {
|
store_options: Some(ObjectStoreParams {
|
||||||
aws_credentials: aws_creds,
|
aws_credentials: aws_creds,
|
||||||
..ObjectStoreParams::default()
|
..ObjectStoreParams::default()
|
||||||
@@ -191,14 +191,7 @@ fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
|||||||
|
|
||||||
let (deferred, promise) = cx.promise();
|
let (deferred, promise) = cx.promise();
|
||||||
rt.spawn(async move {
|
rt.spawn(async move {
|
||||||
let table_rst = database
|
let table_rst = database.open_table_with_params(&table_name, ¶ms).await;
|
||||||
.open_table_with_params(
|
|
||||||
&table_name,
|
|
||||||
OpenTableParams {
|
|
||||||
open_table_params: param,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
|
|
||||||
deferred.settle_with(&channel, move |mut cx| {
|
deferred.settle_with(&channel, move |mut cx| {
|
||||||
let table = Arc::new(Mutex::new(
|
let table = Arc::new(Mutex::new(
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "vectordb"
|
name = "vectordb"
|
||||||
version = "0.1.13"
|
version = "0.1.15"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
@@ -13,6 +13,7 @@ arrow-data = { workspace = true }
|
|||||||
arrow-schema = { workspace = true }
|
arrow-schema = { workspace = true }
|
||||||
object_store = { workspace = true }
|
object_store = { workspace = true }
|
||||||
snafu = "0.7.4"
|
snafu = "0.7.4"
|
||||||
|
half = { workspace = true }
|
||||||
lance = { workspace = true }
|
lance = { workspace = true }
|
||||||
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
||||||
|
|
||||||
|
|||||||
@@ -20,13 +20,16 @@ use lance::dataset::WriteParams;
|
|||||||
use lance::io::object_store::ObjectStore;
|
use lance::io::object_store::ObjectStore;
|
||||||
use snafu::prelude::*;
|
use snafu::prelude::*;
|
||||||
|
|
||||||
use crate::error::{CreateDirSnafu, Result};
|
use crate::error::{CreateDirSnafu, InvalidTableNameSnafu, Result};
|
||||||
use crate::table::{OpenTableParams, Table};
|
use crate::table::{ReadParams, Table};
|
||||||
|
|
||||||
|
pub const LANCE_FILE_EXTENSION: &str = "lance";
|
||||||
|
|
||||||
pub struct Database {
|
pub struct Database {
|
||||||
object_store: ObjectStore,
|
object_store: ObjectStore,
|
||||||
|
|
||||||
pub(crate) uri: String,
|
pub(crate) uri: String,
|
||||||
|
pub(crate) base_path: object_store::path::Path,
|
||||||
}
|
}
|
||||||
|
|
||||||
const LANCE_EXTENSION: &str = "lance";
|
const LANCE_EXTENSION: &str = "lance";
|
||||||
@@ -43,12 +46,13 @@ impl Database {
|
|||||||
///
|
///
|
||||||
/// * A [Database] object.
|
/// * A [Database] object.
|
||||||
pub async fn connect(uri: &str) -> Result<Database> {
|
pub async fn connect(uri: &str) -> Result<Database> {
|
||||||
let (object_store, _) = ObjectStore::from_uri(uri).await?;
|
let (object_store, base_path) = ObjectStore::from_uri(uri).await?;
|
||||||
if object_store.is_local() {
|
if object_store.is_local() {
|
||||||
Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?;
|
Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?;
|
||||||
}
|
}
|
||||||
Ok(Database {
|
Ok(Database {
|
||||||
uri: uri.to_string(),
|
uri: uri.to_string(),
|
||||||
|
base_path,
|
||||||
object_store,
|
object_store,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -57,7 +61,7 @@ impl Database {
|
|||||||
fn try_create_dir(path: &str) -> core::result::Result<(), std::io::Error> {
|
fn try_create_dir(path: &str) -> core::result::Result<(), std::io::Error> {
|
||||||
let path = Path::new(path);
|
let path = Path::new(path);
|
||||||
if !path.try_exists()? {
|
if !path.try_exists()? {
|
||||||
create_dir_all(&path)?;
|
create_dir_all(path)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -70,23 +74,18 @@ impl Database {
|
|||||||
pub async fn table_names(&self) -> Result<Vec<String>> {
|
pub async fn table_names(&self) -> Result<Vec<String>> {
|
||||||
let f = self
|
let f = self
|
||||||
.object_store
|
.object_store
|
||||||
.read_dir(self.uri.as_str())
|
.read_dir(self.base_path.clone())
|
||||||
.await?
|
.await?
|
||||||
.iter()
|
.iter()
|
||||||
.map(|fname| Path::new(fname))
|
.map(Path::new)
|
||||||
.filter(|path| {
|
.filter(|path| {
|
||||||
let is_lance = path
|
let is_lance = path
|
||||||
.extension()
|
.extension()
|
||||||
.map(|e| e.to_str().map(|e| e == LANCE_EXTENSION))
|
.and_then(|e| e.to_str())
|
||||||
.flatten();
|
.map(|e| e == LANCE_EXTENSION);
|
||||||
is_lance.unwrap_or(false)
|
is_lance.unwrap_or(false)
|
||||||
})
|
})
|
||||||
.map(|p| {
|
.filter_map(|p| p.file_stem().and_then(|s| s.to_str().map(String::from)))
|
||||||
p.file_stem()
|
|
||||||
.map(|s| s.to_str().map(|s| String::from(s)))
|
|
||||||
.flatten()
|
|
||||||
})
|
|
||||||
.flatten()
|
|
||||||
.collect();
|
.collect();
|
||||||
Ok(f)
|
Ok(f)
|
||||||
}
|
}
|
||||||
@@ -103,7 +102,8 @@ impl Database {
|
|||||||
batches: impl RecordBatchReader + Send + 'static,
|
batches: impl RecordBatchReader + Send + 'static,
|
||||||
params: Option<WriteParams>,
|
params: Option<WriteParams>,
|
||||||
) -> Result<Table> {
|
) -> Result<Table> {
|
||||||
Table::create(&self.uri, name, batches, params).await
|
let table_uri = self.table_uri(name)?;
|
||||||
|
Table::create(&table_uri, name, batches, params).await
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Open a table in the database.
|
/// Open a table in the database.
|
||||||
@@ -115,7 +115,7 @@ impl Database {
|
|||||||
///
|
///
|
||||||
/// * A [Table] object.
|
/// * A [Table] object.
|
||||||
pub async fn open_table(&self, name: &str) -> Result<Table> {
|
pub async fn open_table(&self, name: &str) -> Result<Table> {
|
||||||
self.open_table_with_params(name, OpenTableParams::default())
|
self.open_table_with_params(name, &ReadParams::default())
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -128,12 +128,9 @@ impl Database {
|
|||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
///
|
||||||
/// * A [Table] object.
|
/// * A [Table] object.
|
||||||
pub async fn open_table_with_params(
|
pub async fn open_table_with_params(&self, name: &str, params: &ReadParams) -> Result<Table> {
|
||||||
&self,
|
let table_uri = self.table_uri(name)?;
|
||||||
name: &str,
|
Table::open_with_params(&table_uri, name, params).await
|
||||||
params: OpenTableParams,
|
|
||||||
) -> Result<Table> {
|
|
||||||
Table::open_with_params(&self.uri, name, params).await
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Drop a table in the database.
|
/// Drop a table in the database.
|
||||||
@@ -141,10 +138,23 @@ impl Database {
|
|||||||
/// # Arguments
|
/// # Arguments
|
||||||
/// * `name` - The name of the table.
|
/// * `name` - The name of the table.
|
||||||
pub async fn drop_table(&self, name: &str) -> Result<()> {
|
pub async fn drop_table(&self, name: &str) -> Result<()> {
|
||||||
let dir_name = format!("{}/{}.{}", self.uri, name, LANCE_EXTENSION);
|
let dir_name = format!("{}.{}", name, LANCE_EXTENSION);
|
||||||
self.object_store.remove_dir_all(dir_name).await?;
|
let full_path = self.base_path.child(dir_name.clone());
|
||||||
|
self.object_store.remove_dir_all(full_path).await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get the URI of a table in the database.
|
||||||
|
fn table_uri(&self, name: &str) -> Result<String> {
|
||||||
|
let path = Path::new(&self.uri);
|
||||||
|
let table_uri = path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
|
||||||
|
|
||||||
|
let uri = table_uri
|
||||||
|
.as_path()
|
||||||
|
.to_str()
|
||||||
|
.context(InvalidTableNameSnafu { name })?;
|
||||||
|
Ok(uri.to_string())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
@@ -35,6 +35,12 @@ pub struct IvfPQIndexBuilder {
|
|||||||
|
|
||||||
impl IvfPQIndexBuilder {
|
impl IvfPQIndexBuilder {
|
||||||
pub fn new() -> IvfPQIndexBuilder {
|
pub fn new() -> IvfPQIndexBuilder {
|
||||||
|
Default::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for IvfPQIndexBuilder {
|
||||||
|
fn default() -> Self {
|
||||||
IvfPQIndexBuilder {
|
IvfPQIndexBuilder {
|
||||||
column: None,
|
column: None,
|
||||||
index_name: None,
|
index_name: None,
|
||||||
|
|||||||
@@ -12,21 +12,22 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::path::Path;
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow_array::{Float32Array, RecordBatchReader};
|
use arrow_array::{Float32Array, RecordBatchReader};
|
||||||
use lance::dataset::{Dataset, ReadParams, WriteParams};
|
use arrow_schema::SchemaRef;
|
||||||
|
use lance::dataset::{Dataset, WriteParams};
|
||||||
use lance::index::IndexType;
|
use lance::index::IndexType;
|
||||||
use snafu::prelude::*;
|
use std::path::Path;
|
||||||
|
|
||||||
use crate::error::{Error, InvalidTableNameSnafu, Result};
|
use crate::error::{Error, Result};
|
||||||
use crate::index::vector::VectorIndexBuilder;
|
use crate::index::vector::VectorIndexBuilder;
|
||||||
use crate::query::Query;
|
use crate::query::Query;
|
||||||
use crate::WriteMode;
|
use crate::WriteMode;
|
||||||
|
|
||||||
|
pub use lance::dataset::ReadParams;
|
||||||
|
|
||||||
pub const VECTOR_COLUMN_NAME: &str = "vector";
|
pub const VECTOR_COLUMN_NAME: &str = "vector";
|
||||||
pub const LANCE_FILE_EXTENSION: &str = "lance";
|
|
||||||
|
|
||||||
/// A table in a LanceDB database.
|
/// A table in a LanceDB database.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
@@ -42,24 +43,25 @@ impl std::fmt::Display for Table {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct OpenTableParams {
|
|
||||||
pub open_table_params: ReadParams,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Table {
|
impl Table {
|
||||||
/// Opens an existing Table
|
/// Opens an existing Table
|
||||||
///
|
///
|
||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `base_path` - The base path where the table is located
|
/// * `uri` - The uri to a [Table]
|
||||||
/// * `name` The Table name
|
/// * `name` - The table name
|
||||||
///
|
///
|
||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
///
|
||||||
/// * A [Table] object.
|
/// * A [Table] object.
|
||||||
pub async fn open(base_uri: &str, name: &str) -> Result<Self> {
|
pub async fn open(uri: &str) -> Result<Self> {
|
||||||
Self::open_with_params(base_uri, name, OpenTableParams::default()).await
|
let name = Self::get_table_name(uri)?;
|
||||||
|
Self::open_with_params(uri, &name, &ReadParams::default()).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Open an Table with a given name.
|
||||||
|
pub async fn open_with_name(uri: &str, name: &str) -> Result<Self> {
|
||||||
|
Self::open_with_params(uri, name, &ReadParams::default()).await
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Opens an existing Table
|
/// Opens an existing Table
|
||||||
@@ -68,25 +70,13 @@ impl Table {
|
|||||||
///
|
///
|
||||||
/// * `base_path` - The base path where the table is located
|
/// * `base_path` - The base path where the table is located
|
||||||
/// * `name` The Table name
|
/// * `name` The Table name
|
||||||
/// * `params` The [OpenTableParams] to use when opening the table
|
/// * `params` The [ReadParams] to use when opening the table
|
||||||
///
|
///
|
||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
///
|
||||||
/// * A [Table] object.
|
/// * A [Table] object.
|
||||||
pub async fn open_with_params(
|
pub async fn open_with_params(uri: &str, name: &str, params: &ReadParams) -> Result<Self> {
|
||||||
base_uri: &str,
|
let dataset = Dataset::open_with_params(uri, params)
|
||||||
name: &str,
|
|
||||||
params: OpenTableParams,
|
|
||||||
) -> Result<Self> {
|
|
||||||
let path = Path::new(base_uri);
|
|
||||||
|
|
||||||
let table_uri = path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
|
|
||||||
let uri = table_uri
|
|
||||||
.as_path()
|
|
||||||
.to_str()
|
|
||||||
.context(InvalidTableNameSnafu { name })?;
|
|
||||||
|
|
||||||
let dataset = Dataset::open_with_params(uri, ¶ms.open_table_params)
|
|
||||||
.await
|
.await
|
||||||
.map_err(|e| match e {
|
.map_err(|e| match e {
|
||||||
lance::Error::DatasetNotFound { .. } => Error::TableNotFound {
|
lance::Error::DatasetNotFound { .. } => Error::TableNotFound {
|
||||||
@@ -103,31 +93,73 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Checkout a specific version of this [`Table`]
|
||||||
|
///
|
||||||
|
pub async fn checkout(uri: &str, version: u64) -> Result<Self> {
|
||||||
|
let name = Self::get_table_name(uri)?;
|
||||||
|
Self::checkout_with_params(uri, &name, version, &ReadParams::default()).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn checkout_with_name(uri: &str, name: &str, version: u64) -> Result<Self> {
|
||||||
|
Self::checkout_with_params(uri, name, version, &ReadParams::default()).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn checkout_with_params(
|
||||||
|
uri: &str,
|
||||||
|
name: &str,
|
||||||
|
version: u64,
|
||||||
|
params: &ReadParams,
|
||||||
|
) -> Result<Self> {
|
||||||
|
let dataset = Dataset::checkout_with_params(uri, version, params)
|
||||||
|
.await
|
||||||
|
.map_err(|e| match e {
|
||||||
|
lance::Error::DatasetNotFound { .. } => Error::TableNotFound {
|
||||||
|
name: name.to_string(),
|
||||||
|
},
|
||||||
|
e => Error::Lance {
|
||||||
|
message: e.to_string(),
|
||||||
|
},
|
||||||
|
})?;
|
||||||
|
Ok(Table {
|
||||||
|
name: name.to_string(),
|
||||||
|
uri: uri.to_string(),
|
||||||
|
dataset: Arc::new(dataset),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_table_name(uri: &str) -> Result<String> {
|
||||||
|
let path = Path::new(uri);
|
||||||
|
let name = path
|
||||||
|
.file_stem()
|
||||||
|
.ok_or(Error::TableNotFound {
|
||||||
|
name: uri.to_string(),
|
||||||
|
})?
|
||||||
|
.to_str()
|
||||||
|
.ok_or(Error::InvalidTableName {
|
||||||
|
name: uri.to_string(),
|
||||||
|
})?;
|
||||||
|
Ok(name.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
/// Creates a new Table
|
/// Creates a new Table
|
||||||
///
|
///
|
||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
///
|
||||||
/// * `base_path` - The base path where the table is located
|
/// * `uri` - The URI to the table.
|
||||||
/// * `name` The Table name
|
/// * `name` The Table name
|
||||||
/// * `batches` RecordBatch to be saved in the database
|
/// * `batches` RecordBatch to be saved in the database.
|
||||||
|
/// * `params` - Write parameters.
|
||||||
///
|
///
|
||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
///
|
||||||
/// * A [Table] object.
|
/// * A [Table] object.
|
||||||
pub async fn create(
|
pub async fn create(
|
||||||
base_uri: &str,
|
uri: &str,
|
||||||
name: &str,
|
name: &str,
|
||||||
batches: impl RecordBatchReader + Send + 'static,
|
batches: impl RecordBatchReader + Send + 'static,
|
||||||
params: Option<WriteParams>,
|
params: Option<WriteParams>,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let base_path = Path::new(base_uri);
|
let dataset = Dataset::write(batches, uri, params)
|
||||||
let table_uri = base_path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
|
|
||||||
let uri = table_uri
|
|
||||||
.as_path()
|
|
||||||
.to_str()
|
|
||||||
.context(InvalidTableNameSnafu { name })?
|
|
||||||
.to_string();
|
|
||||||
let dataset = Dataset::write(batches, &uri, params)
|
|
||||||
.await
|
.await
|
||||||
.map_err(|e| match e {
|
.map_err(|e| match e {
|
||||||
lance::Error::DatasetAlreadyExists { .. } => Error::TableAlreadyExists {
|
lance::Error::DatasetAlreadyExists { .. } => Error::TableAlreadyExists {
|
||||||
@@ -139,11 +171,21 @@ impl Table {
|
|||||||
})?;
|
})?;
|
||||||
Ok(Table {
|
Ok(Table {
|
||||||
name: name.to_string(),
|
name: name.to_string(),
|
||||||
uri,
|
uri: uri.to_string(),
|
||||||
dataset: Arc::new(dataset),
|
dataset: Arc::new(dataset),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Schema of this Table.
|
||||||
|
pub fn schema(&self) -> SchemaRef {
|
||||||
|
Arc::new(self.dataset.schema().into())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Version of this Table
|
||||||
|
pub fn version(&self) -> u64 {
|
||||||
|
self.dataset.version().version
|
||||||
|
}
|
||||||
|
|
||||||
/// Create index on the table.
|
/// Create index on the table.
|
||||||
pub async fn create_index(&mut self, index_builder: &impl VectorIndexBuilder) -> Result<()> {
|
pub async fn create_index(&mut self, index_builder: &impl VectorIndexBuilder) -> Result<()> {
|
||||||
use lance::index::DatasetIndexExt;
|
use lance::index::DatasetIndexExt;
|
||||||
@@ -253,14 +295,13 @@ mod tests {
|
|||||||
async fn test_open() {
|
async fn test_open() {
|
||||||
let tmp_dir = tempdir().unwrap();
|
let tmp_dir = tempdir().unwrap();
|
||||||
let dataset_path = tmp_dir.path().join("test.lance");
|
let dataset_path = tmp_dir.path().join("test.lance");
|
||||||
let uri = tmp_dir.path().to_str().unwrap();
|
|
||||||
|
|
||||||
let batches = make_test_batches();
|
let batches = make_test_batches();
|
||||||
Dataset::write(batches, dataset_path.to_str().unwrap(), None)
|
Dataset::write(batches, dataset_path.to_str().unwrap(), None)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let table = Table::open(uri, "test").await.unwrap();
|
let table = Table::open(dataset_path.to_str().unwrap()).await.unwrap();
|
||||||
|
|
||||||
assert_eq!(table.name, "test")
|
assert_eq!(table.name, "test")
|
||||||
}
|
}
|
||||||
@@ -269,11 +310,12 @@ mod tests {
|
|||||||
async fn test_open_not_found() {
|
async fn test_open_not_found() {
|
||||||
let tmp_dir = tempdir().unwrap();
|
let tmp_dir = tempdir().unwrap();
|
||||||
let uri = tmp_dir.path().to_str().unwrap();
|
let uri = tmp_dir.path().to_str().unwrap();
|
||||||
let table = Table::open(uri, "test").await;
|
let table = Table::open(uri).await;
|
||||||
assert!(matches!(table.unwrap_err(), Error::TableNotFound { .. }));
|
assert!(matches!(table.unwrap_err(), Error::TableNotFound { .. }));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
#[cfg(not(windows))]
|
||||||
fn test_object_store_path() {
|
fn test_object_store_path() {
|
||||||
use std::path::Path as StdPath;
|
use std::path::Path as StdPath;
|
||||||
let p = StdPath::new("s3://bucket/path/to/file");
|
let p = StdPath::new("s3://bucket/path/to/file");
|
||||||
@@ -350,10 +392,7 @@ mod tests {
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
|
||||||
table
|
table.add(new_batches, Some(param)).await.unwrap();
|
||||||
.add(new_batches, Some(param))
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(table.count_rows().await.unwrap(), 10);
|
assert_eq!(table.count_rows().await.unwrap(), 10);
|
||||||
assert_eq!(table.name, "test");
|
assert_eq!(table.name, "test");
|
||||||
}
|
}
|
||||||
@@ -362,14 +401,14 @@ mod tests {
|
|||||||
async fn test_search() {
|
async fn test_search() {
|
||||||
let tmp_dir = tempdir().unwrap();
|
let tmp_dir = tempdir().unwrap();
|
||||||
let dataset_path = tmp_dir.path().join("test.lance");
|
let dataset_path = tmp_dir.path().join("test.lance");
|
||||||
let uri = tmp_dir.path().to_str().unwrap();
|
let uri = dataset_path.to_str().unwrap();
|
||||||
|
|
||||||
let batches = make_test_batches();
|
let batches = make_test_batches();
|
||||||
Dataset::write(batches, dataset_path.to_str().unwrap(), None)
|
Dataset::write(batches, dataset_path.to_str().unwrap(), None)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let table = Table::open(uri, "test").await.unwrap();
|
let table = Table::open(uri).await.unwrap();
|
||||||
|
|
||||||
let vector = Float32Array::from_iter_values([0.1, 0.2]);
|
let vector = Float32Array::from_iter_values([0.1, 0.2]);
|
||||||
let query = table.search(vector.clone());
|
let query = table.search(vector.clone());
|
||||||
@@ -401,7 +440,7 @@ mod tests {
|
|||||||
async fn test_open_table_options() {
|
async fn test_open_table_options() {
|
||||||
let tmp_dir = tempdir().unwrap();
|
let tmp_dir = tempdir().unwrap();
|
||||||
let dataset_path = tmp_dir.path().join("test.lance");
|
let dataset_path = tmp_dir.path().join("test.lance");
|
||||||
let uri = tmp_dir.path().to_str().unwrap();
|
let uri = dataset_path.to_str().unwrap();
|
||||||
|
|
||||||
let batches = make_test_batches();
|
let batches = make_test_batches();
|
||||||
Dataset::write(batches, dataset_path.to_str().unwrap(), None)
|
Dataset::write(batches, dataset_path.to_str().unwrap(), None)
|
||||||
@@ -412,15 +451,12 @@ mod tests {
|
|||||||
|
|
||||||
let mut object_store_params = ObjectStoreParams::default();
|
let mut object_store_params = ObjectStoreParams::default();
|
||||||
object_store_params.object_store_wrapper = Some(wrapper.clone());
|
object_store_params.object_store_wrapper = Some(wrapper.clone());
|
||||||
let param = OpenTableParams {
|
let param = ReadParams {
|
||||||
open_table_params: ReadParams {
|
store_options: Some(object_store_params),
|
||||||
store_options: Some(object_store_params),
|
..Default::default()
|
||||||
..ReadParams::default()
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
assert!(!wrapper.called());
|
assert!(!wrapper.called());
|
||||||
let _ = Table::open_with_params(uri, "test", param).await.unwrap();
|
let _ = Table::open_with_params(uri, "test", ¶m).await.unwrap();
|
||||||
assert!(wrapper.called());
|
assert!(wrapper.called());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user