mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 21:39:57 +00:00
Compare commits
13 Commits
python-v0.
...
api-docs-f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1023a5754b | ||
|
|
7808f28ec7 | ||
|
|
157fb9ea72 | ||
|
|
7a5e65d437 | ||
|
|
5228ca4b6b | ||
|
|
dcc216a244 | ||
|
|
a7aa168c7f | ||
|
|
7a89b5ec68 | ||
|
|
ee862abd29 | ||
|
|
4e1ed2b139 | ||
|
|
008e0b1a93 | ||
|
|
82cbcf6d07 | ||
|
|
1cd5426aea |
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.4.3
|
||||
current_version = 0.4.4
|
||||
commit = True
|
||||
message = Bump version: {current_version} → {new_version}
|
||||
tag = True
|
||||
|
||||
1
.github/workflows/docs.yml
vendored
1
.github/workflows/docs.yml
vendored
@@ -61,6 +61,7 @@ jobs:
|
||||
working-directory: node
|
||||
run: |
|
||||
npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
|
||||
cp ../docs/src/javascript.md ../docs/src/javascript/javascript.md
|
||||
- name: Build docs
|
||||
run: |
|
||||
PYTHONPATH=. mkdocs build -f docs/mkdocs.yml
|
||||
|
||||
114
.github/workflows/nodejs.yml
vendored
Normal file
114
.github/workflows/nodejs.yml
vendored
Normal file
@@ -0,0 +1,114 @@
|
||||
name: NodeJS (NAPI)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
paths:
|
||||
- nodejs/**
|
||||
- .github/workflows/nodejs.yml
|
||||
- docker-compose.yml
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||
# "1" means line tables only, which is useful for panic tracebacks.
|
||||
RUSTFLAGS: "-C debuginfo=1"
|
||||
RUST_BACKTRACE: "1"
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
name: Lint
|
||||
runs-on: ubuntu-22.04
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
working-directory: nodejs
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Lint
|
||||
run: |
|
||||
cargo fmt --all -- --check
|
||||
cargo clippy --all --all-features -- -D warnings
|
||||
npm ci
|
||||
npm run lint
|
||||
linux:
|
||||
name: Linux (NodeJS ${{ matrix.node-version }})
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
matrix:
|
||||
node-version: [ "18", "20" ]
|
||||
runs-on: "ubuntu-22.04"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
working-directory: nodejs
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
cache: 'npm'
|
||||
cache-dependency-path: node/package-lock.json
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
npm install -g @napi-rs/cli
|
||||
- name: Build
|
||||
run: |
|
||||
npm ci
|
||||
npm run build
|
||||
- name: Test
|
||||
run: npm run test
|
||||
macos:
|
||||
timeout-minutes: 30
|
||||
runs-on: "macos-13"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
working-directory: nodejs
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: node/package-lock.json
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
brew install protobuf
|
||||
npm install -g @napi-rs/cli
|
||||
- name: Build
|
||||
run: |
|
||||
npm ci
|
||||
npm run build
|
||||
- name: Test
|
||||
run: |
|
||||
npm run test
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -29,6 +29,7 @@ python/dist
|
||||
node/dist
|
||||
node/examples/**/package-lock.json
|
||||
node/examples/**/dist
|
||||
dist
|
||||
|
||||
## Rust
|
||||
target
|
||||
|
||||
19
Cargo.toml
19
Cargo.toml
@@ -1,14 +1,20 @@
|
||||
[workspace]
|
||||
members = ["rust/ffi/node", "rust/vectordb"]
|
||||
members = ["rust/ffi/node", "rust/vectordb", "nodejs"]
|
||||
# Python package needs to be built by maturin.
|
||||
exclude = ["python"]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
edition = "2021"
|
||||
authors = ["Lance Devs <dev@lancedb.com>"]
|
||||
license = "Apache-2.0"
|
||||
repository = "https://github.com/lancedb/lancedb"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.9.7", "features" = ["dynamodb"] }
|
||||
lance-index = { "version" = "=0.9.7" }
|
||||
lance-linalg = { "version" = "=0.9.7" }
|
||||
lance-testing = { "version" = "=0.9.7" }
|
||||
lance = { "version" = "=0.9.9", "features" = ["dynamodb"] }
|
||||
lance-index = { "version" = "=0.9.9" }
|
||||
lance-linalg = { "version" = "=0.9.9" }
|
||||
lance-testing = { "version" = "=0.9.9" }
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "49.0.0", optional = false }
|
||||
arrow-array = "49.0"
|
||||
@@ -18,11 +24,14 @@ arrow-ord = "49.0"
|
||||
arrow-schema = "49.0"
|
||||
arrow-arith = "49.0"
|
||||
arrow-cast = "49.0"
|
||||
async-trait = "0"
|
||||
chrono = "0.4.23"
|
||||
half = { "version" = "=2.3.1", default-features = false, features = [
|
||||
"num-traits",
|
||||
] }
|
||||
futures = "0"
|
||||
log = "0.4"
|
||||
object_store = "0.9.0"
|
||||
snafu = "0.7.4"
|
||||
url = "2"
|
||||
num-traits = "0.2"
|
||||
|
||||
@@ -90,6 +90,7 @@ nav:
|
||||
- Full-text search: fts.md
|
||||
- Filtering: sql.md
|
||||
- Versioning & Reproducibility: notebooks/reproducibility.ipynb
|
||||
- Configuring Storage: guides/storage.md
|
||||
- 🧬 Managing embeddings:
|
||||
- Overview: embeddings/index.md
|
||||
- Explicit management: embeddings/embedding_explicit.md
|
||||
@@ -128,12 +129,9 @@ nav:
|
||||
- 💭 FAQs: faq.md
|
||||
- ⚙️ API reference:
|
||||
- 🐍 Python: python/python.md
|
||||
- 👾 JavaScript: javascript/modules.md
|
||||
- 👾 JavaScript: javascript/javascript.md
|
||||
- ☁️ LanceDB Cloud:
|
||||
- Overview: cloud/index.md
|
||||
- API reference:
|
||||
- 🐍 Python: python/saas-python.md
|
||||
- 👾 JavaScript: javascript/saas-modules.md
|
||||
|
||||
|
||||
- Quick start: basic.md
|
||||
@@ -149,6 +147,7 @@ nav:
|
||||
- Full-text search: fts.md
|
||||
- Filtering: sql.md
|
||||
- Versioning & Reproducibility: notebooks/reproducibility.ipynb
|
||||
- Configuring Storage: guides/storage.md
|
||||
- Managing Embeddings:
|
||||
- Overview: embeddings/index.md
|
||||
- Explicit management: embeddings/embedding_explicit.md
|
||||
@@ -182,12 +181,9 @@ nav:
|
||||
- TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
|
||||
- API reference:
|
||||
- Python: python/python.md
|
||||
- Javascript: javascript/modules.md
|
||||
- Javascript: javascript/javascript.md
|
||||
- LanceDB Cloud:
|
||||
- Overview: cloud/index.md
|
||||
- API reference:
|
||||
- 🐍 Python: python/saas-python.md
|
||||
- 👾 JavaScript: javascript/saas-modules.md
|
||||
|
||||
extra_css:
|
||||
- styles/global.css
|
||||
|
||||
91
docs/src/guides/storage.md
Normal file
91
docs/src/guides/storage.md
Normal file
@@ -0,0 +1,91 @@
|
||||
# Configuring cloud storage
|
||||
|
||||
<!-- TODO: When we add documentation for how to configure other storage types
|
||||
we can change the name to a more general "Configuring storage" -->
|
||||
|
||||
When using LanceDB OSS, you can choose where to store your data. The tradeoffs between different storage options are discussed in the [storage concepts guide](../concepts/storage.md). This guide shows how to configure LanceDB to use different storage options.
|
||||
|
||||
## Object Stores
|
||||
|
||||
LanceDB OSS supports object stores such as AWS S3 (and compatible stores), Azure Blob Store, and Google Cloud Storage. Which object store to use is determined by the URI scheme of the dataset path. `s3://` is used for AWS S3, `az://` is used for Azure Blob Storage, and `gs://` is used for Google Cloud Storage. These URIs are passed to the `connect` function:
|
||||
|
||||
=== "Python"
|
||||
|
||||
AWS S3:
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
db = lancedb.connect("s3://bucket/path")
|
||||
```
|
||||
|
||||
Google Cloud Storage:
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
db = lancedb.connect("gs://bucket/path")
|
||||
```
|
||||
|
||||
Azure Blob Storage:
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
db = lancedb.connect("az://bucket/path")
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
|
||||
AWS S3:
|
||||
|
||||
```javascript
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("s3://bucket/path");
|
||||
```
|
||||
|
||||
Google Cloud Storage:
|
||||
|
||||
```javascript
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("gs://bucket/path");
|
||||
```
|
||||
|
||||
Azure Blob Storage:
|
||||
|
||||
```javascript
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("az://bucket/path");
|
||||
```
|
||||
|
||||
In most cases, when running in the respective cloud and permissions are set up correctly, no additional configuration is required. When running outside of the respective cloud, authentication credentials must be provided using environment variables. In general, these environment variables are the same as those used by the respective cloud SDKs. The sections below describe the environment variables that can be used to configure each object store.
|
||||
|
||||
LanceDB OSS uses the [object-store](https://docs.rs/object_store/latest/object_store/) Rust crate for object store access. There are general environment variables that can be used to configure the object store, such as the request timeout and proxy configuration. See the [object_store ClientConfigKey](https://docs.rs/object_store/latest/object_store/enum.ClientConfigKey.html) doc for available configuration options. The environment variables that can be set are the snake-cased versions of these variable names. For example, to set `ProxyUrl` use the environment variable `PROXY_URL`. (Don't let the Rust docs intimidate you! We link to them so you can see an up-to-date list of the available options.)
|
||||
|
||||
|
||||
### AWS S3
|
||||
|
||||
To configure credentials for AWS S3, you can use the `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and `AWS_SESSION_TOKEN` environment variables.
|
||||
|
||||
Alternatively, if you are using AWS SSO, you can use the `AWS_PROFILE` and `AWS_DEFAULT_REGION` environment variables.
|
||||
|
||||
You can see a full list of environment variables [here](https://docs.rs/object_store/latest/object_store/aws/struct.AmazonS3Builder.html#method.from_env).
|
||||
|
||||
#### S3-compatible stores
|
||||
|
||||
LanceDB can also connect to S3-compatible stores, such as MinIO. To do so, you must specify two environment variables: `AWS_ENDPOINT` and `AWS_DEFAULT_REGION`. `AWS_ENDPOINT` should be the URL of the S3-compatible store, and `AWS_DEFAULT_REGION` should be the region to use.
|
||||
|
||||
<!-- TODO: we should also document the use of S3 Express once we fully support it -->
|
||||
|
||||
### Google Cloud Storage
|
||||
|
||||
GCS credentials are configured by setting the `GOOGLE_SERVICE_ACCOUNT` environment variable to the path of a JSON file containing the service account credentials. There are several aliases for this environment variable, documented [here](https://docs.rs/object_store/latest/object_store/gcp/struct.GoogleCloudStorageBuilder.html#method.from_env).
|
||||
|
||||
|
||||
!!! info "HTTP/2 support"
|
||||
|
||||
By default, GCS uses HTTP/1 for communication, as opposed to HTTP/2. This improves maximum throughput significantly. However, if you wish to use HTTP/2 for some reason, you can set the environment variable `HTTP1_ONLY` to `false`.
|
||||
|
||||
### Azure Blob Storage
|
||||
|
||||
Azure Blob Storage credentials can be configured by setting the `AZURE_STORAGE_ACCOUNT_NAME` and ``AZURE_STORAGE_ACCOUNT_KEY`` environment variables. The full list of environment variables that can be set are documented [here](https://docs.rs/object_store/latest/object_store/azure/struct.MicrosoftAzureBuilder.html#method.from_env).
|
||||
|
||||
|
||||
<!-- TODO: demonstrate how to configure networked file systems for optimal performance -->
|
||||
62
docs/src/javascript.md
Normal file
62
docs/src/javascript.md
Normal file
@@ -0,0 +1,62 @@
|
||||
# Javascript API Reference
|
||||
|
||||
This section contains the API reference for LanceDB Javascript API.
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
npm install vectordb
|
||||
```
|
||||
|
||||
This will download the appropriate native library for your platform. We currently
|
||||
support:
|
||||
|
||||
* Linux (x86_64 and aarch64)
|
||||
* MacOS (Intel and ARM/M1/M2)
|
||||
* Windows (x86_64 only)
|
||||
|
||||
We do not yet support musl-based Linux (such as Alpine Linux) or arch64 Windows.
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Example
|
||||
Connect to a local directory
|
||||
```javascript
|
||||
const lancedb = require('vectordb');
|
||||
//connect to a local database
|
||||
const db = await lancedb.connect('data/sample-lancedb');
|
||||
```
|
||||
Connect to LancdDB cloud
|
||||
```javascript
|
||||
connect to LanceDB Cloud
|
||||
const db = await lancedb.connect({
|
||||
uri: "db://my-database",
|
||||
apiKey: "sk_...",
|
||||
region: "us-east-1"
|
||||
});
|
||||
```
|
||||
Create a table followed by a search
|
||||
```javascript
|
||||
const table = await db.createTable("my_table",
|
||||
[{ id: 1, vector: [0.1, 1.0], item: "foo", price: 10.0 },
|
||||
{ id: 2, vector: [3.9, 0.5], item: "bar", price: 20.0 }])
|
||||
const results = await table.search([0.1, 0.3]).limit(20).execute();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
The [examples](./examples) folder contains complete examples.
|
||||
|
||||
## Table of contents
|
||||
### Connection
|
||||
Connect to a LanceDB database.
|
||||
|
||||
- [Connection](interfaces/Connection.md)
|
||||
### Table
|
||||
A Table is a collection of Records in a LanceDB Database.
|
||||
|
||||
- [Table](interfaces/Table.md)
|
||||
### Query
|
||||
The LanceDB Query
|
||||
|
||||
- [Query](classes/Query.md)
|
||||
|
||||
@@ -38,4 +38,4 @@ A [WriteMode](../enums/WriteMode.md) to use on this operation
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:1019](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1019)
|
||||
[index.ts:1070](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L1070)
|
||||
|
||||
@@ -46,7 +46,7 @@ A connection to a LanceDB database.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:489](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L489)
|
||||
[index.ts:496](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L496)
|
||||
|
||||
## Properties
|
||||
|
||||
@@ -56,7 +56,7 @@ A connection to a LanceDB database.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:487](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L487)
|
||||
[index.ts:494](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L494)
|
||||
|
||||
___
|
||||
|
||||
@@ -74,7 +74,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:486](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L486)
|
||||
[index.ts:493](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L493)
|
||||
|
||||
## Accessors
|
||||
|
||||
@@ -92,7 +92,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:494](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L494)
|
||||
[index.ts:501](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L501)
|
||||
|
||||
## Methods
|
||||
|
||||
@@ -113,7 +113,7 @@ Creates a new Table, optionally initializing it with new data.
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `name` | `string` \| [`CreateTableOptions`](../interfaces/CreateTableOptions.md)\<`T`\> |
|
||||
| `data?` | `Record`\<`string`, `unknown`\>[] |
|
||||
| `data?` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] |
|
||||
| `optsOrEmbedding?` | [`WriteOptions`](../interfaces/WriteOptions.md) \| [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)\<`T`\> |
|
||||
| `opt?` | [`WriteOptions`](../interfaces/WriteOptions.md) |
|
||||
|
||||
@@ -127,7 +127,7 @@ Creates a new Table, optionally initializing it with new data.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:542](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L542)
|
||||
[index.ts:549](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L549)
|
||||
|
||||
___
|
||||
|
||||
@@ -158,7 +158,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:576](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L576)
|
||||
[index.ts:583](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L583)
|
||||
|
||||
___
|
||||
|
||||
@@ -184,7 +184,7 @@ Drop an existing table.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:630](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L630)
|
||||
[index.ts:637](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L637)
|
||||
|
||||
___
|
||||
|
||||
@@ -210,7 +210,7 @@ Open a table in the database.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:510](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L510)
|
||||
[index.ts:517](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L517)
|
||||
|
||||
▸ **openTable**\<`T`\>(`name`, `embeddings`): `Promise`\<[`Table`](../interfaces/Table.md)\<`T`\>\>
|
||||
|
||||
@@ -239,7 +239,7 @@ Connection.openTable
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:518](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L518)
|
||||
[index.ts:525](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L525)
|
||||
|
||||
▸ **openTable**\<`T`\>(`name`, `embeddings?`): `Promise`\<[`Table`](../interfaces/Table.md)\<`T`\>\>
|
||||
|
||||
@@ -266,7 +266,7 @@ Connection.openTable
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:522](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L522)
|
||||
[index.ts:529](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L529)
|
||||
|
||||
___
|
||||
|
||||
@@ -286,4 +286,4 @@ Get the names of all tables in the database.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:501](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L501)
|
||||
[index.ts:508](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L508)
|
||||
|
||||
@@ -74,7 +74,7 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:642](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L642)
|
||||
[index.ts:649](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L649)
|
||||
|
||||
• **new LocalTable**\<`T`\>(`tbl`, `name`, `options`, `embeddings`)
|
||||
|
||||
@@ -95,7 +95,7 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:649](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L649)
|
||||
[index.ts:656](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L656)
|
||||
|
||||
## Properties
|
||||
|
||||
@@ -105,7 +105,7 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:639](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L639)
|
||||
[index.ts:646](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L646)
|
||||
|
||||
___
|
||||
|
||||
@@ -115,7 +115,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:638](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L638)
|
||||
[index.ts:645](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L645)
|
||||
|
||||
___
|
||||
|
||||
@@ -125,7 +125,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:637](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L637)
|
||||
[index.ts:644](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L644)
|
||||
|
||||
___
|
||||
|
||||
@@ -143,7 +143,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:640](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L640)
|
||||
[index.ts:647](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L647)
|
||||
|
||||
___
|
||||
|
||||
@@ -153,7 +153,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:636](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L636)
|
||||
[index.ts:643](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L643)
|
||||
|
||||
___
|
||||
|
||||
@@ -179,7 +179,7 @@ Creates a filter query to find all rows matching the specified criteria
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:688](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L688)
|
||||
[index.ts:695](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L695)
|
||||
|
||||
## Accessors
|
||||
|
||||
@@ -197,7 +197,7 @@ Creates a filter query to find all rows matching the specified criteria
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:668](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L668)
|
||||
[index.ts:675](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L675)
|
||||
|
||||
___
|
||||
|
||||
@@ -215,7 +215,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:849](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L849)
|
||||
[index.ts:875](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L875)
|
||||
|
||||
## Methods
|
||||
|
||||
@@ -229,7 +229,7 @@ Insert records into this Table.
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `data` | `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -243,7 +243,7 @@ The number of rows added to the table
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:696](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L696)
|
||||
[index.ts:703](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L703)
|
||||
|
||||
___
|
||||
|
||||
@@ -257,7 +257,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:861](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L861)
|
||||
[index.ts:887](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L887)
|
||||
|
||||
___
|
||||
|
||||
@@ -267,6 +267,8 @@ ___
|
||||
|
||||
Clean up old versions of the table, freeing disk space.
|
||||
|
||||
Note: this API is not yet available on LanceDB Cloud
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
@@ -280,7 +282,7 @@ Clean up old versions of the table, freeing disk space.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:808](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L808)
|
||||
[index.ts:833](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L833)
|
||||
|
||||
___
|
||||
|
||||
@@ -293,6 +295,8 @@ Run the compaction process on the table.
|
||||
This can be run after making several small appends to optimize the table
|
||||
for faster reads.
|
||||
|
||||
Note: this API is not yet available on LanceDB Cloud
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
@@ -307,7 +311,7 @@ Metrics about the compaction operation.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:831](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L831)
|
||||
[index.ts:857](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L857)
|
||||
|
||||
___
|
||||
|
||||
@@ -327,7 +331,7 @@ Returns the number of rows in this table.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:749](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L749)
|
||||
[index.ts:773](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L773)
|
||||
|
||||
___
|
||||
|
||||
@@ -357,7 +361,7 @@ VectorIndexParams.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:734](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L734)
|
||||
[index.ts:758](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L758)
|
||||
|
||||
___
|
||||
|
||||
@@ -392,7 +396,7 @@ await table.createScalarIndex('my_col')
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:742](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L742)
|
||||
[index.ts:766](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L766)
|
||||
|
||||
___
|
||||
|
||||
@@ -418,7 +422,7 @@ Delete rows from this table.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:758](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L758)
|
||||
[index.ts:782](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L782)
|
||||
|
||||
___
|
||||
|
||||
@@ -440,7 +444,7 @@ Creates a filter query to find all rows matching the specified criteria
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:684](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L684)
|
||||
[index.ts:691](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L691)
|
||||
|
||||
___
|
||||
|
||||
@@ -454,7 +458,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:854](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L854)
|
||||
[index.ts:880](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L880)
|
||||
|
||||
___
|
||||
|
||||
@@ -480,7 +484,7 @@ Get statistics about an index.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:845](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L845)
|
||||
[index.ts:871](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L871)
|
||||
|
||||
___
|
||||
|
||||
@@ -500,7 +504,7 @@ List the indicies on this table.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:841](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L841)
|
||||
[index.ts:867](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L867)
|
||||
|
||||
___
|
||||
|
||||
@@ -514,7 +518,7 @@ Insert records into this Table, replacing its contents.
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `data` | `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table Type Table is ArrowTable |
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -528,7 +532,7 @@ The number of rows added to the table
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:716](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L716)
|
||||
[index.ts:732](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L732)
|
||||
|
||||
___
|
||||
|
||||
@@ -554,7 +558,7 @@ Creates a search query to find the nearest neighbors of the given search term
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:676](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L676)
|
||||
[index.ts:683](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L683)
|
||||
|
||||
___
|
||||
|
||||
@@ -580,4 +584,4 @@ Update rows in this table.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:771](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L771)
|
||||
[index.ts:795](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L795)
|
||||
|
||||
56
docs/src/javascript/classes/MakeArrowTableOptions.md
Normal file
56
docs/src/javascript/classes/MakeArrowTableOptions.md
Normal file
@@ -0,0 +1,56 @@
|
||||
[vectordb](../README.md) / [Exports](../modules.md) / MakeArrowTableOptions
|
||||
|
||||
# Class: MakeArrowTableOptions
|
||||
|
||||
Options to control the makeArrowTable call.
|
||||
|
||||
## Table of contents
|
||||
|
||||
### Constructors
|
||||
|
||||
- [constructor](MakeArrowTableOptions.md#constructor)
|
||||
|
||||
### Properties
|
||||
|
||||
- [schema](MakeArrowTableOptions.md#schema)
|
||||
- [vectorColumns](MakeArrowTableOptions.md#vectorcolumns)
|
||||
|
||||
## Constructors
|
||||
|
||||
### constructor
|
||||
|
||||
• **new MakeArrowTableOptions**(`values?`)
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `values?` | `Partial`\<[`MakeArrowTableOptions`](MakeArrowTableOptions.md)\> |
|
||||
|
||||
#### Defined in
|
||||
|
||||
[arrow.ts:56](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/arrow.ts#L56)
|
||||
|
||||
## Properties
|
||||
|
||||
### schema
|
||||
|
||||
• `Optional` **schema**: `Schema`\<`any`\>
|
||||
|
||||
Provided schema.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[arrow.ts:49](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/arrow.ts#L49)
|
||||
|
||||
___
|
||||
|
||||
### vectorColumns
|
||||
|
||||
• **vectorColumns**: `Record`\<`string`, `VectorColumnOptions`\>
|
||||
|
||||
Vector columns
|
||||
|
||||
#### Defined in
|
||||
|
||||
[arrow.ts:52](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/arrow.ts#L52)
|
||||
@@ -40,7 +40,7 @@ An embedding function that automatically creates vector representation for a giv
|
||||
|
||||
#### Defined in
|
||||
|
||||
[embedding/openai.ts:21](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/embedding/openai.ts#L21)
|
||||
[embedding/openai.ts:22](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/embedding/openai.ts#L22)
|
||||
|
||||
## Properties
|
||||
|
||||
@@ -50,17 +50,17 @@ An embedding function that automatically creates vector representation for a giv
|
||||
|
||||
#### Defined in
|
||||
|
||||
[embedding/openai.ts:19](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/embedding/openai.ts#L19)
|
||||
[embedding/openai.ts:20](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/embedding/openai.ts#L20)
|
||||
|
||||
___
|
||||
|
||||
### \_openai
|
||||
|
||||
• `Private` `Readonly` **\_openai**: `any`
|
||||
• `Private` `Readonly` **\_openai**: `OpenAI`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[embedding/openai.ts:18](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/embedding/openai.ts#L18)
|
||||
[embedding/openai.ts:19](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/embedding/openai.ts#L19)
|
||||
|
||||
___
|
||||
|
||||
@@ -76,7 +76,7 @@ The name of the column that will be used as input for the Embedding Function.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[embedding/openai.ts:50](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/embedding/openai.ts#L50)
|
||||
[embedding/openai.ts:56](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/embedding/openai.ts#L56)
|
||||
|
||||
## Methods
|
||||
|
||||
@@ -102,4 +102,4 @@ Creates a vector representation for the given values.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[embedding/openai.ts:38](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/embedding/openai.ts#L38)
|
||||
[embedding/openai.ts:43](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/embedding/openai.ts#L43)
|
||||
|
||||
@@ -65,7 +65,7 @@ A builder for nearest neighbor queries for LanceDB.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:38](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L38)
|
||||
[query.ts:38](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L38)
|
||||
|
||||
## Properties
|
||||
|
||||
@@ -75,7 +75,7 @@ A builder for nearest neighbor queries for LanceDB.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:36](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L36)
|
||||
[query.ts:36](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L36)
|
||||
|
||||
___
|
||||
|
||||
@@ -85,7 +85,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:33](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L33)
|
||||
[query.ts:33](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L33)
|
||||
|
||||
___
|
||||
|
||||
@@ -95,7 +95,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:29](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L29)
|
||||
[query.ts:29](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L29)
|
||||
|
||||
___
|
||||
|
||||
@@ -105,7 +105,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:34](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L34)
|
||||
[query.ts:34](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L34)
|
||||
|
||||
___
|
||||
|
||||
@@ -115,7 +115,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:31](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L31)
|
||||
[query.ts:31](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L31)
|
||||
|
||||
___
|
||||
|
||||
@@ -125,7 +125,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:35](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L35)
|
||||
[query.ts:35](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L35)
|
||||
|
||||
___
|
||||
|
||||
@@ -135,7 +135,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:26](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L26)
|
||||
[query.ts:26](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L26)
|
||||
|
||||
___
|
||||
|
||||
@@ -145,7 +145,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:28](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L28)
|
||||
[query.ts:28](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L28)
|
||||
|
||||
___
|
||||
|
||||
@@ -155,7 +155,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:30](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L30)
|
||||
[query.ts:30](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L30)
|
||||
|
||||
___
|
||||
|
||||
@@ -165,7 +165,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:32](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L32)
|
||||
[query.ts:32](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L32)
|
||||
|
||||
___
|
||||
|
||||
@@ -175,7 +175,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:27](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L27)
|
||||
[query.ts:27](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L27)
|
||||
|
||||
___
|
||||
|
||||
@@ -201,7 +201,7 @@ A filter statement to be applied to this query.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:87](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L87)
|
||||
[query.ts:87](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L87)
|
||||
|
||||
## Methods
|
||||
|
||||
@@ -223,7 +223,7 @@ Execute the query and return the results as an Array of Objects
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:115](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L115)
|
||||
[query.ts:115](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L115)
|
||||
|
||||
___
|
||||
|
||||
@@ -245,7 +245,7 @@ A filter statement to be applied to this query.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:82](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L82)
|
||||
[query.ts:82](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L82)
|
||||
|
||||
___
|
||||
|
||||
@@ -259,7 +259,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:142](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L142)
|
||||
[query.ts:143](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L143)
|
||||
|
||||
___
|
||||
|
||||
@@ -281,7 +281,7 @@ Sets the number of results that will be returned
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:55](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L55)
|
||||
[query.ts:55](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L55)
|
||||
|
||||
___
|
||||
|
||||
@@ -307,7 +307,7 @@ MetricType for the different options
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:102](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L102)
|
||||
[query.ts:102](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L102)
|
||||
|
||||
___
|
||||
|
||||
@@ -329,7 +329,7 @@ The number of probes used. A higher number makes search more accurate but also s
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:73](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L73)
|
||||
[query.ts:73](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L73)
|
||||
|
||||
___
|
||||
|
||||
@@ -349,7 +349,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:107](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L107)
|
||||
[query.ts:107](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L107)
|
||||
|
||||
___
|
||||
|
||||
@@ -371,7 +371,7 @@ Refine the results by reading extra elements and re-ranking them in memory.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:64](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L64)
|
||||
[query.ts:64](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L64)
|
||||
|
||||
___
|
||||
|
||||
@@ -393,4 +393,4 @@ Return only the specified columns.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[query.ts:93](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L93)
|
||||
[query.ts:93](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/query.ts#L93)
|
||||
|
||||
224
docs/src/javascript/classes/RemoteConnection.md
Normal file
224
docs/src/javascript/classes/RemoteConnection.md
Normal file
@@ -0,0 +1,224 @@
|
||||
[vectordb](../README.md) / [Exports](../modules.md) / RemoteConnection
|
||||
|
||||
# Class: RemoteConnection
|
||||
|
||||
Remote connection.
|
||||
|
||||
## Implements
|
||||
|
||||
- [`Connection`](../interfaces/Connection.md)
|
||||
|
||||
## Table of contents
|
||||
|
||||
### Constructors
|
||||
|
||||
- [constructor](RemoteConnection.md#constructor)
|
||||
|
||||
### Properties
|
||||
|
||||
- [\_client](RemoteConnection.md#_client)
|
||||
- [\_dbName](RemoteConnection.md#_dbname)
|
||||
|
||||
### Accessors
|
||||
|
||||
- [uri](RemoteConnection.md#uri)
|
||||
|
||||
### Methods
|
||||
|
||||
- [createTable](RemoteConnection.md#createtable)
|
||||
- [dropTable](RemoteConnection.md#droptable)
|
||||
- [openTable](RemoteConnection.md#opentable)
|
||||
- [tableNames](RemoteConnection.md#tablenames)
|
||||
|
||||
## Constructors
|
||||
|
||||
### constructor
|
||||
|
||||
• **new RemoteConnection**(`opts`)
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `opts` | [`ConnectionOptions`](../interfaces/ConnectionOptions.md) |
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:48](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L48)
|
||||
|
||||
## Properties
|
||||
|
||||
### \_client
|
||||
|
||||
• `Private` `Readonly` **\_client**: `HttpLancedbClient`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:45](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L45)
|
||||
|
||||
___
|
||||
|
||||
### \_dbName
|
||||
|
||||
• `Private` `Readonly` **\_dbName**: `string`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:46](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L46)
|
||||
|
||||
## Accessors
|
||||
|
||||
### uri
|
||||
|
||||
• `get` **uri**(): `string`
|
||||
|
||||
#### Returns
|
||||
|
||||
`string`
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Connection](../interfaces/Connection.md).[uri](../interfaces/Connection.md#uri)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:75](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L75)
|
||||
|
||||
## Methods
|
||||
|
||||
### createTable
|
||||
|
||||
▸ **createTable**\<`T`\>(`nameOrOpts`, `data?`, `optsOrEmbedding?`, `opt?`): `Promise`\<[`Table`](../interfaces/Table.md)\<`T`\>\>
|
||||
|
||||
Creates a new Table, optionally initializing it with new data.
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name |
|
||||
| :------ |
|
||||
| `T` |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `nameOrOpts` | `string` \| [`CreateTableOptions`](../interfaces/CreateTableOptions.md)\<`T`\> |
|
||||
| `data?` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] |
|
||||
| `optsOrEmbedding?` | [`WriteOptions`](../interfaces/WriteOptions.md) \| [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)\<`T`\> |
|
||||
| `opt?` | [`WriteOptions`](../interfaces/WriteOptions.md) |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`\<[`Table`](../interfaces/Table.md)\<`T`\>\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Connection](../interfaces/Connection.md).[createTable](../interfaces/Connection.md#createtable)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:107](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L107)
|
||||
|
||||
___
|
||||
|
||||
### dropTable
|
||||
|
||||
▸ **dropTable**(`name`): `Promise`\<`void`\>
|
||||
|
||||
Drop an existing table.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table to drop. |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`\<`void`\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Connection](../interfaces/Connection.md).[dropTable](../interfaces/Connection.md#droptable)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:175](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L175)
|
||||
|
||||
___
|
||||
|
||||
### openTable
|
||||
|
||||
▸ **openTable**(`name`): `Promise`\<[`Table`](../interfaces/Table.md)\<`number`[]\>\>
|
||||
|
||||
Open a table in the database.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table. |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`\<[`Table`](../interfaces/Table.md)\<`number`[]\>\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Connection](../interfaces/Connection.md).[openTable](../interfaces/Connection.md#opentable)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:91](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L91)
|
||||
|
||||
▸ **openTable**\<`T`\>(`name`, `embeddings`): `Promise`\<[`Table`](../interfaces/Table.md)\<`T`\>\>
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name |
|
||||
| :------ |
|
||||
| `T` |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `name` | `string` |
|
||||
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)\<`T`\> |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`\<[`Table`](../interfaces/Table.md)\<`T`\>\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
Connection.openTable
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:92](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L92)
|
||||
|
||||
___
|
||||
|
||||
### tableNames
|
||||
|
||||
▸ **tableNames**(`pageToken?`, `limit?`): `Promise`\<`string`[]\>
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Default value |
|
||||
| :------ | :------ | :------ |
|
||||
| `pageToken` | `string` | `''` |
|
||||
| `limit` | `number` | `10` |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`\<`string`[]\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Connection](../interfaces/Connection.md).[tableNames](../interfaces/Connection.md#tablenames)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:80](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L80)
|
||||
470
docs/src/javascript/classes/RemoteTable.md
Normal file
470
docs/src/javascript/classes/RemoteTable.md
Normal file
@@ -0,0 +1,470 @@
|
||||
[vectordb](../README.md) / [Exports](../modules.md) / RemoteTable
|
||||
|
||||
# Class: RemoteTable\<T\>
|
||||
|
||||
A LanceDB Table is the collection of Records. Each Record has one or more vector fields.
|
||||
|
||||
## Type parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `T` | `number`[] |
|
||||
|
||||
## Implements
|
||||
|
||||
- [`Table`](../interfaces/Table.md)\<`T`\>
|
||||
|
||||
## Table of contents
|
||||
|
||||
### Constructors
|
||||
|
||||
- [constructor](RemoteTable.md#constructor)
|
||||
|
||||
### Properties
|
||||
|
||||
- [\_client](RemoteTable.md#_client)
|
||||
- [\_embeddings](RemoteTable.md#_embeddings)
|
||||
- [\_name](RemoteTable.md#_name)
|
||||
|
||||
### Accessors
|
||||
|
||||
- [name](RemoteTable.md#name)
|
||||
- [schema](RemoteTable.md#schema)
|
||||
|
||||
### Methods
|
||||
|
||||
- [add](RemoteTable.md#add)
|
||||
- [countRows](RemoteTable.md#countrows)
|
||||
- [createIndex](RemoteTable.md#createindex)
|
||||
- [createScalarIndex](RemoteTable.md#createscalarindex)
|
||||
- [delete](RemoteTable.md#delete)
|
||||
- [indexStats](RemoteTable.md#indexstats)
|
||||
- [listIndices](RemoteTable.md#listindices)
|
||||
- [overwrite](RemoteTable.md#overwrite)
|
||||
- [search](RemoteTable.md#search)
|
||||
- [update](RemoteTable.md#update)
|
||||
|
||||
## Constructors
|
||||
|
||||
### constructor
|
||||
|
||||
• **new RemoteTable**\<`T`\>(`client`, `name`)
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `T` | `number`[] |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `client` | `HttpLancedbClient` |
|
||||
| `name` | `string` |
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:234](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L234)
|
||||
|
||||
• **new RemoteTable**\<`T`\>(`client`, `name`, `embeddings`)
|
||||
|
||||
#### Type parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `T` | `number`[] |
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `client` | `HttpLancedbClient` |
|
||||
| `name` | `string` |
|
||||
| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)\<`T`\> |
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:235](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L235)
|
||||
|
||||
## Properties
|
||||
|
||||
### \_client
|
||||
|
||||
• `Private` `Readonly` **\_client**: `HttpLancedbClient`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:230](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L230)
|
||||
|
||||
___
|
||||
|
||||
### \_embeddings
|
||||
|
||||
• `Private` `Optional` `Readonly` **\_embeddings**: [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)\<`T`\>
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:231](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L231)
|
||||
|
||||
___
|
||||
|
||||
### \_name
|
||||
|
||||
• `Private` `Readonly` **\_name**: `string`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:232](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L232)
|
||||
|
||||
## Accessors
|
||||
|
||||
### name
|
||||
|
||||
• `get` **name**(): `string`
|
||||
|
||||
#### Returns
|
||||
|
||||
`string`
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[name](../interfaces/Table.md#name)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:250](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L250)
|
||||
|
||||
___
|
||||
|
||||
### schema
|
||||
|
||||
• `get` **schema**(): `Promise`\<`any`\>
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`\<`any`\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[schema](../interfaces/Table.md#schema)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:254](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L254)
|
||||
|
||||
## Methods
|
||||
|
||||
### add
|
||||
|
||||
▸ **add**(`data`): `Promise`\<`number`\>
|
||||
|
||||
Insert records into this Table.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`\<`number`\>
|
||||
|
||||
The number of rows added to the table
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[add](../interfaces/Table.md#add)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:273](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L273)
|
||||
|
||||
___
|
||||
|
||||
### countRows
|
||||
|
||||
▸ **countRows**(): `Promise`\<`number`\>
|
||||
|
||||
Returns the number of rows in this table.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`\<`number`\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[countRows](../interfaces/Table.md#countrows)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:372](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L372)
|
||||
|
||||
___
|
||||
|
||||
### createIndex
|
||||
|
||||
▸ **createIndex**(`indexParams`): `Promise`\<`void`\>
|
||||
|
||||
Create an ANN index on this Table vector index.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `indexParams` | [`IvfPQIndexConfig`](../interfaces/IvfPQIndexConfig.md) | The parameters of this Index, |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`\<`void`\>
|
||||
|
||||
**`See`**
|
||||
|
||||
VectorIndexParams.
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[createIndex](../interfaces/Table.md#createindex)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:326](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L326)
|
||||
|
||||
___
|
||||
|
||||
### createScalarIndex
|
||||
|
||||
▸ **createScalarIndex**(`column`, `replace`): `Promise`\<`void`\>
|
||||
|
||||
Create a scalar index on this Table for the given column
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `column` | `string` | The column to index |
|
||||
| `replace` | `boolean` | If false, fail if an index already exists on the column Scalar indices, like vector indices, can be used to speed up scans. A scalar index can speed up scans that contain filter expressions on the indexed column. For example, the following scan will be faster if the column `my_col` has a scalar index: ```ts const con = await lancedb.connect('./.lancedb'); const table = await con.openTable('images'); const results = await table.where('my_col = 7').execute(); ``` Scalar indices can also speed up scans containing a vector search and a prefilter: ```ts const con = await lancedb.connect('././lancedb'); const table = await con.openTable('images'); const results = await table.search([1.0, 2.0]).where('my_col != 7').prefilter(true); ``` Scalar indices can only speed up scans for basic filters using equality, comparison, range (e.g. `my_col BETWEEN 0 AND 100`), and set membership (e.g. `my_col IN (0, 1, 2)`) Scalar indices can be used if the filter contains multiple indexed columns and the filter criteria are AND'd or OR'd together (e.g. `my_col < 0 AND other_col> 100`) Scalar indices may be used if the filter contains non-indexed columns but, depending on the structure of the filter, they may not be usable. For example, if the column `not_indexed` does not have a scalar index then the filter `my_col = 0 OR not_indexed = 1` will not be able to use any scalar index on `my_col`. |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`\<`void`\>
|
||||
|
||||
**`Examples`**
|
||||
|
||||
```ts
|
||||
const con = await lancedb.connect('././lancedb')
|
||||
const table = await con.openTable('images')
|
||||
await table.createScalarIndex('my_col')
|
||||
```
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[createScalarIndex](../interfaces/Table.md#createscalarindex)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:368](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L368)
|
||||
|
||||
___
|
||||
|
||||
### delete
|
||||
|
||||
▸ **delete**(`filter`): `Promise`\<`void`\>
|
||||
|
||||
Delete rows from this table.
|
||||
|
||||
This can be used to delete a single row, many rows, all rows, or
|
||||
sometimes no rows (if your predicate matches nothing).
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `filter` | `string` | A filter in the same format used by a sql WHERE clause. The filter must not be empty. |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`\<`void`\>
|
||||
|
||||
**`Examples`**
|
||||
|
||||
```ts
|
||||
const con = await lancedb.connect("./.lancedb")
|
||||
const data = [
|
||||
{id: 1, vector: [1, 2]},
|
||||
{id: 2, vector: [3, 4]},
|
||||
{id: 3, vector: [5, 6]},
|
||||
];
|
||||
const tbl = await con.createTable("my_table", data)
|
||||
await tbl.delete("id = 2")
|
||||
await tbl.countRows() // Returns 2
|
||||
```
|
||||
|
||||
If you have a list of values to delete, you can combine them into a
|
||||
stringified list and use the `IN` operator:
|
||||
|
||||
```ts
|
||||
const to_remove = [1, 5];
|
||||
await tbl.delete(`id IN (${to_remove.join(",")})`)
|
||||
await tbl.countRows() // Returns 1
|
||||
```
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[delete](../interfaces/Table.md#delete)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:377](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L377)
|
||||
|
||||
___
|
||||
|
||||
### indexStats
|
||||
|
||||
▸ **indexStats**(`indexUuid`): `Promise`\<[`IndexStats`](../interfaces/IndexStats.md)\>
|
||||
|
||||
Get statistics about an index.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type |
|
||||
| :------ | :------ |
|
||||
| `indexUuid` | `string` |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`\<[`IndexStats`](../interfaces/IndexStats.md)\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[indexStats](../interfaces/Table.md#indexstats)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:414](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L414)
|
||||
|
||||
___
|
||||
|
||||
### listIndices
|
||||
|
||||
▸ **listIndices**(): `Promise`\<[`VectorIndex`](../interfaces/VectorIndex.md)[]\>
|
||||
|
||||
List the indicies on this table.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`\<[`VectorIndex`](../interfaces/VectorIndex.md)[]\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[listIndices](../interfaces/Table.md#listindices)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:403](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L403)
|
||||
|
||||
___
|
||||
|
||||
### overwrite
|
||||
|
||||
▸ **overwrite**(`data`): `Promise`\<`number`\>
|
||||
|
||||
Insert records into this Table, replacing its contents.
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`\<`number`\>
|
||||
|
||||
The number of rows added to the table
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[overwrite](../interfaces/Table.md#overwrite)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:300](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L300)
|
||||
|
||||
___
|
||||
|
||||
### search
|
||||
|
||||
▸ **search**(`query`): [`Query`](Query.md)\<`T`\>
|
||||
|
||||
Creates a search query to find the nearest neighbors of the given search term
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `query` | `T` | The query search term |
|
||||
|
||||
#### Returns
|
||||
|
||||
[`Query`](Query.md)\<`T`\>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[search](../interfaces/Table.md#search)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:269](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L269)
|
||||
|
||||
___
|
||||
|
||||
### update
|
||||
|
||||
▸ **update**(`args`): `Promise`\<`void`\>
|
||||
|
||||
Update rows in this table.
|
||||
|
||||
This can be used to update a single row, many rows, all rows, or
|
||||
sometimes no rows (if your predicate matches nothing).
|
||||
|
||||
#### Parameters
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `args` | [`UpdateArgs`](../interfaces/UpdateArgs.md) \| [`UpdateSqlArgs`](../interfaces/UpdateSqlArgs.md) | see [UpdateArgs](../interfaces/UpdateArgs.md) and [UpdateSqlArgs](../interfaces/UpdateSqlArgs.md) for more details |
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`\<`void`\>
|
||||
|
||||
**`Examples`**
|
||||
|
||||
```ts
|
||||
const con = await lancedb.connect("./.lancedb")
|
||||
const data = [
|
||||
{id: 1, vector: [3, 3], name: 'Ye'},
|
||||
{id: 2, vector: [4, 4], name: 'Mike'},
|
||||
];
|
||||
const tbl = await con.createTable("my_table", data)
|
||||
|
||||
await tbl.update({
|
||||
where: "id = 2",
|
||||
values: { vector: [2, 2], name: "Michael" },
|
||||
})
|
||||
|
||||
let results = await tbl.search([1, 1]).execute();
|
||||
// Returns [
|
||||
// {id: 2, vector: [2, 2], name: 'Michael'}
|
||||
// {id: 1, vector: [3, 3], name: 'Ye'}
|
||||
// ]
|
||||
```
|
||||
|
||||
#### Implementation of
|
||||
|
||||
[Table](../interfaces/Table.md).[update](../interfaces/Table.md#update)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[remote/index.ts:383](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/remote/index.ts#L383)
|
||||
@@ -22,7 +22,7 @@ Cosine distance
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:1041](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1041)
|
||||
[index.ts:1092](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L1092)
|
||||
|
||||
___
|
||||
|
||||
@@ -34,7 +34,7 @@ Dot product
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:1046](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1046)
|
||||
[index.ts:1097](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L1097)
|
||||
|
||||
___
|
||||
|
||||
@@ -46,4 +46,4 @@ Euclidean distance
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:1036](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1036)
|
||||
[index.ts:1087](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L1087)
|
||||
|
||||
@@ -22,7 +22,7 @@ Append new data to the table.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:1007](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1007)
|
||||
[index.ts:1058](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L1058)
|
||||
|
||||
___
|
||||
|
||||
@@ -34,7 +34,7 @@ Create a new [Table](../interfaces/Table.md).
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:1003](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1003)
|
||||
[index.ts:1054](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L1054)
|
||||
|
||||
___
|
||||
|
||||
@@ -46,4 +46,4 @@ Overwrite the existing [Table](../interfaces/Table.md) if presented.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:1005](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1005)
|
||||
[index.ts:1056](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L1056)
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:54](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L54)
|
||||
[index.ts:57](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L57)
|
||||
|
||||
___
|
||||
|
||||
@@ -28,7 +28,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:56](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L56)
|
||||
[index.ts:59](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L59)
|
||||
|
||||
___
|
||||
|
||||
@@ -38,4 +38,4 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:58](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L58)
|
||||
[index.ts:61](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L61)
|
||||
|
||||
@@ -19,7 +19,7 @@ The number of bytes removed from disk.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:878](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L878)
|
||||
[index.ts:904](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L904)
|
||||
|
||||
___
|
||||
|
||||
@@ -31,4 +31,4 @@ The number of old table versions removed.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:882](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L882)
|
||||
[index.ts:908](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L908)
|
||||
|
||||
@@ -22,7 +22,7 @@ fragments added.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:933](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L933)
|
||||
[index.ts:959](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L959)
|
||||
|
||||
___
|
||||
|
||||
@@ -35,7 +35,7 @@ file.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:928](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L928)
|
||||
[index.ts:954](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L954)
|
||||
|
||||
___
|
||||
|
||||
@@ -47,7 +47,7 @@ The number of new fragments that were created.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:923](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L923)
|
||||
[index.ts:949](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L949)
|
||||
|
||||
___
|
||||
|
||||
@@ -59,4 +59,4 @@ The number of fragments that were removed.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:919](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L919)
|
||||
[index.ts:945](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L945)
|
||||
|
||||
@@ -24,7 +24,7 @@ Default is true.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:901](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L901)
|
||||
[index.ts:927](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L927)
|
||||
|
||||
___
|
||||
|
||||
@@ -38,7 +38,7 @@ the deleted rows. Default is 10%.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:907](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L907)
|
||||
[index.ts:933](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L933)
|
||||
|
||||
___
|
||||
|
||||
@@ -50,7 +50,7 @@ The maximum number of rows per group. Defaults to 1024.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:895](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L895)
|
||||
[index.ts:921](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L921)
|
||||
|
||||
___
|
||||
|
||||
@@ -63,7 +63,7 @@ the number of cores on the machine.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:912](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L912)
|
||||
[index.ts:938](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L938)
|
||||
|
||||
___
|
||||
|
||||
@@ -77,4 +77,4 @@ Defaults to 1024 * 1024.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:891](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L891)
|
||||
[index.ts:917](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L917)
|
||||
|
||||
@@ -9,6 +9,7 @@ Connection could be local against filesystem or remote against a server.
|
||||
## Implemented by
|
||||
|
||||
- [`LocalConnection`](../classes/LocalConnection.md)
|
||||
- [`RemoteConnection`](../classes/RemoteConnection.md)
|
||||
|
||||
## Table of contents
|
||||
|
||||
@@ -31,7 +32,7 @@ Connection could be local against filesystem or remote against a server.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:183](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L183)
|
||||
[index.ts:188](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L188)
|
||||
|
||||
## Methods
|
||||
|
||||
@@ -59,7 +60,7 @@ Creates a new Table, optionally initializing it with new data.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:207](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L207)
|
||||
[index.ts:212](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L212)
|
||||
|
||||
▸ **createTable**(`name`, `data`): `Promise`\<[`Table`](Table.md)\<`number`[]\>\>
|
||||
|
||||
@@ -70,7 +71,7 @@ Creates a new Table and initialize it with new data.
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table. |
|
||||
| `data` | `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
|
||||
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -78,7 +79,7 @@ Creates a new Table and initialize it with new data.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:221](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L221)
|
||||
[index.ts:226](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L226)
|
||||
|
||||
▸ **createTable**(`name`, `data`, `options`): `Promise`\<[`Table`](Table.md)\<`number`[]\>\>
|
||||
|
||||
@@ -89,7 +90,7 @@ Creates a new Table and initialize it with new data.
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table. |
|
||||
| `data` | `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
|
||||
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
|
||||
| `options` | [`WriteOptions`](WriteOptions.md) | The write options to use when creating the table. |
|
||||
|
||||
#### Returns
|
||||
@@ -98,7 +99,7 @@ Creates a new Table and initialize it with new data.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:233](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L233)
|
||||
[index.ts:238](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L238)
|
||||
|
||||
▸ **createTable**\<`T`\>(`name`, `data`, `embeddings`): `Promise`\<[`Table`](Table.md)\<`T`\>\>
|
||||
|
||||
@@ -115,7 +116,7 @@ Creates a new Table and initialize it with new data.
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table. |
|
||||
| `data` | `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
|
||||
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
|
||||
| `embeddings` | [`EmbeddingFunction`](EmbeddingFunction.md)\<`T`\> | An embedding function to use on this table |
|
||||
|
||||
#### Returns
|
||||
@@ -124,7 +125,7 @@ Creates a new Table and initialize it with new data.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:246](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L246)
|
||||
[index.ts:251](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L251)
|
||||
|
||||
▸ **createTable**\<`T`\>(`name`, `data`, `embeddings`, `options`): `Promise`\<[`Table`](Table.md)\<`T`\>\>
|
||||
|
||||
@@ -141,7 +142,7 @@ Creates a new Table and initialize it with new data.
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `name` | `string` | The name of the table. |
|
||||
| `data` | `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
|
||||
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
|
||||
| `embeddings` | [`EmbeddingFunction`](EmbeddingFunction.md)\<`T`\> | An embedding function to use on this table |
|
||||
| `options` | [`WriteOptions`](WriteOptions.md) | The write options to use when creating the table. |
|
||||
|
||||
@@ -151,7 +152,7 @@ Creates a new Table and initialize it with new data.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:259](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L259)
|
||||
[index.ts:264](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L264)
|
||||
|
||||
___
|
||||
|
||||
@@ -173,7 +174,7 @@ Drop an existing table.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:270](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L270)
|
||||
[index.ts:275](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L275)
|
||||
|
||||
___
|
||||
|
||||
@@ -202,7 +203,7 @@ Open a table in the database.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:193](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L193)
|
||||
[index.ts:198](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L198)
|
||||
|
||||
___
|
||||
|
||||
@@ -216,4 +217,4 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:185](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L185)
|
||||
[index.ts:190](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L190)
|
||||
|
||||
@@ -19,9 +19,13 @@
|
||||
|
||||
• `Optional` **apiKey**: `string`
|
||||
|
||||
API key for the remote connections
|
||||
|
||||
Can also be passed by setting environment variable `LANCEDB_API_KEY`
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:81](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L81)
|
||||
[index.ts:88](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L88)
|
||||
|
||||
___
|
||||
|
||||
@@ -35,7 +39,7 @@ If not provided, LanceDB will use the default credentials provider chain.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:75](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L75)
|
||||
[index.ts:78](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L78)
|
||||
|
||||
___
|
||||
|
||||
@@ -47,7 +51,7 @@ AWS region to connect to. Default is defaultAwsRegion.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:78](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L78)
|
||||
[index.ts:81](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L81)
|
||||
|
||||
___
|
||||
|
||||
@@ -55,13 +59,13 @@ ___
|
||||
|
||||
• `Optional` **hostOverride**: `string`
|
||||
|
||||
Override the host URL for the remote connections.
|
||||
Override the host URL for the remote connection.
|
||||
|
||||
This is useful for local testing.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:91](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L91)
|
||||
[index.ts:98](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L98)
|
||||
|
||||
___
|
||||
|
||||
@@ -73,7 +77,7 @@ Region to connect
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:84](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L84)
|
||||
[index.ts:91](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L91)
|
||||
|
||||
___
|
||||
|
||||
@@ -85,8 +89,8 @@ LanceDB database URI.
|
||||
|
||||
- `/path/to/database` - local database
|
||||
- `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
||||
- `db://host:port` - remote database (SaaS)
|
||||
- `db://host:port` - remote database (LanceDB cloud)
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:69](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L69)
|
||||
[index.ts:72](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L72)
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:116](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L116)
|
||||
[index.ts:121](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L121)
|
||||
|
||||
___
|
||||
|
||||
@@ -36,7 +36,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:122](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L122)
|
||||
[index.ts:127](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L127)
|
||||
|
||||
___
|
||||
|
||||
@@ -46,7 +46,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:113](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L113)
|
||||
[index.ts:118](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L118)
|
||||
|
||||
___
|
||||
|
||||
@@ -56,7 +56,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:119](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L119)
|
||||
[index.ts:124](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L124)
|
||||
|
||||
___
|
||||
|
||||
@@ -66,4 +66,4 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:125](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L125)
|
||||
[index.ts:130](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L130)
|
||||
|
||||
@@ -45,7 +45,7 @@ Creates a vector representation for the given values.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[embedding/embedding_function.ts:27](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/embedding/embedding_function.ts#L27)
|
||||
[embedding/embedding_function.ts:27](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/embedding/embedding_function.ts#L27)
|
||||
|
||||
___
|
||||
|
||||
@@ -57,4 +57,4 @@ The name of the column that will be used as input for the Embedding Function.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[embedding/embedding_function.ts:22](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/embedding/embedding_function.ts#L22)
|
||||
[embedding/embedding_function.ts:22](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/embedding/embedding_function.ts#L22)
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:478](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L478)
|
||||
[index.ts:485](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L485)
|
||||
|
||||
___
|
||||
|
||||
@@ -27,4 +27,4 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:479](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L479)
|
||||
[index.ts:486](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L486)
|
||||
|
||||
@@ -29,7 +29,7 @@ The column to be indexed
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:942](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L942)
|
||||
[index.ts:968](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L968)
|
||||
|
||||
___
|
||||
|
||||
@@ -41,7 +41,7 @@ Cache size of the index
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:991](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L991)
|
||||
[index.ts:1042](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L1042)
|
||||
|
||||
___
|
||||
|
||||
@@ -49,11 +49,11 @@ ___
|
||||
|
||||
• `Optional` **index\_name**: `string`
|
||||
|
||||
A unique name for the index
|
||||
Note: this parameter is not supported on LanceDB Cloud
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:947](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L947)
|
||||
[index.ts:976](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L976)
|
||||
|
||||
___
|
||||
|
||||
@@ -61,11 +61,11 @@ ___
|
||||
|
||||
• `Optional` **max\_iters**: `number`
|
||||
|
||||
The max number of iterations for kmeans training.
|
||||
Note: this parameter is not yet supported on LanceDB Cloud
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:962](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L962)
|
||||
[index.ts:997](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L997)
|
||||
|
||||
___
|
||||
|
||||
@@ -73,11 +73,11 @@ ___
|
||||
|
||||
• `Optional` **max\_opq\_iters**: `number`
|
||||
|
||||
Max number of iterations to train OPQ, if `use_opq` is true.
|
||||
Note: this parameter is not yet supported on LanceDB Cloud
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:981](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L981)
|
||||
[index.ts:1029](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L1029)
|
||||
|
||||
___
|
||||
|
||||
@@ -89,7 +89,7 @@ Metric type, L2 or Cosine
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:952](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L952)
|
||||
[index.ts:981](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L981)
|
||||
|
||||
___
|
||||
|
||||
@@ -97,11 +97,11 @@ ___
|
||||
|
||||
• `Optional` **num\_bits**: `number`
|
||||
|
||||
The number of bits to present one PQ centroid.
|
||||
Note: this parameter is not yet supported on LanceDB Cloud
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:976](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L976)
|
||||
[index.ts:1021](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L1021)
|
||||
|
||||
___
|
||||
|
||||
@@ -109,11 +109,11 @@ ___
|
||||
|
||||
• `Optional` **num\_partitions**: `number`
|
||||
|
||||
The number of partitions this index
|
||||
Note: this parameter is not yet supported on LanceDB Cloud
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:957](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L957)
|
||||
[index.ts:989](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L989)
|
||||
|
||||
___
|
||||
|
||||
@@ -121,11 +121,11 @@ ___
|
||||
|
||||
• `Optional` **num\_sub\_vectors**: `number`
|
||||
|
||||
Number of subvectors to build PQ code
|
||||
Note: this parameter is not yet supported on LanceDB Cloud
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:972](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L972)
|
||||
[index.ts:1013](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L1013)
|
||||
|
||||
___
|
||||
|
||||
@@ -133,11 +133,11 @@ ___
|
||||
|
||||
• `Optional` **replace**: `boolean`
|
||||
|
||||
Replace an existing index with the same name if it exists.
|
||||
Note: this parameter is not yet supported on LanceDB Cloud
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:986](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L986)
|
||||
[index.ts:1037](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L1037)
|
||||
|
||||
___
|
||||
|
||||
@@ -147,7 +147,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:993](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L993)
|
||||
[index.ts:1044](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L1044)
|
||||
|
||||
___
|
||||
|
||||
@@ -155,8 +155,8 @@ ___
|
||||
|
||||
• `Optional` **use\_opq**: `boolean`
|
||||
|
||||
Train as optimized product quantization.
|
||||
Note: this parameter is not yet supported on LanceDB Cloud
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:967](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L967)
|
||||
[index.ts:1005](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L1005)
|
||||
|
||||
@@ -13,6 +13,7 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
|
||||
## Implemented by
|
||||
|
||||
- [`LocalTable`](../classes/LocalTable.md)
|
||||
- [`RemoteTable`](../classes/RemoteTable.md)
|
||||
|
||||
## Table of contents
|
||||
|
||||
@@ -35,7 +36,7 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
|
||||
|
||||
### add
|
||||
|
||||
• **add**: (`data`: `Record`\<`string`, `unknown`\>[]) => `Promise`\<`number`\>
|
||||
• **add**: (`data`: `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[]) => `Promise`\<`number`\>
|
||||
|
||||
#### Type declaration
|
||||
|
||||
@@ -47,7 +48,7 @@ Insert records into this Table.
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `data` | `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||
|
||||
##### Returns
|
||||
|
||||
@@ -57,7 +58,7 @@ The number of rows added to the table
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:291](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L291)
|
||||
[index.ts:296](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L296)
|
||||
|
||||
___
|
||||
|
||||
@@ -77,7 +78,7 @@ Returns the number of rows in this table.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:361](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L361)
|
||||
[index.ts:368](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L368)
|
||||
|
||||
___
|
||||
|
||||
@@ -107,7 +108,7 @@ VectorIndexParams.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:306](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L306)
|
||||
[index.ts:313](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L313)
|
||||
|
||||
___
|
||||
|
||||
@@ -142,7 +143,7 @@ await table.createScalarIndex('my_col')
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:356](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L356)
|
||||
[index.ts:363](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L363)
|
||||
|
||||
___
|
||||
|
||||
@@ -194,7 +195,7 @@ await tbl.countRows() // Returns 1
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:395](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L395)
|
||||
[index.ts:402](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L402)
|
||||
|
||||
___
|
||||
|
||||
@@ -220,7 +221,7 @@ Get statistics about an index.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:438](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L438)
|
||||
[index.ts:445](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L445)
|
||||
|
||||
___
|
||||
|
||||
@@ -240,7 +241,7 @@ List the indicies on this table.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:433](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L433)
|
||||
[index.ts:440](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L440)
|
||||
|
||||
___
|
||||
|
||||
@@ -250,13 +251,13 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:277](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L277)
|
||||
[index.ts:282](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L282)
|
||||
|
||||
___
|
||||
|
||||
### overwrite
|
||||
|
||||
• **overwrite**: (`data`: `Record`\<`string`, `unknown`\>[]) => `Promise`\<`number`\>
|
||||
• **overwrite**: (`data`: `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[]) => `Promise`\<`number`\>
|
||||
|
||||
#### Type declaration
|
||||
|
||||
@@ -268,7 +269,7 @@ Insert records into this Table, replacing its contents.
|
||||
|
||||
| Name | Type | Description |
|
||||
| :------ | :------ | :------ |
|
||||
| `data` | `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
|
||||
|
||||
##### Returns
|
||||
|
||||
@@ -278,7 +279,7 @@ The number of rows added to the table
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:299](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L299)
|
||||
[index.ts:304](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L304)
|
||||
|
||||
___
|
||||
|
||||
@@ -288,7 +289,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:440](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L440)
|
||||
[index.ts:447](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L447)
|
||||
|
||||
___
|
||||
|
||||
@@ -314,7 +315,7 @@ Creates a search query to find the nearest neighbors of the given search term
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:283](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L283)
|
||||
[index.ts:288](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L288)
|
||||
|
||||
___
|
||||
|
||||
@@ -365,4 +366,4 @@ let results = await tbl.search([1, 1]).execute();
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:428](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L428)
|
||||
[index.ts:435](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L435)
|
||||
|
||||
@@ -20,7 +20,7 @@ new values to set
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:454](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L454)
|
||||
[index.ts:461](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L461)
|
||||
|
||||
___
|
||||
|
||||
@@ -33,4 +33,4 @@ in which case all rows will be updated.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:448](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L448)
|
||||
[index.ts:455](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L455)
|
||||
|
||||
@@ -20,7 +20,7 @@ new values to set as SQL expressions.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:468](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L468)
|
||||
[index.ts:475](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L475)
|
||||
|
||||
___
|
||||
|
||||
@@ -33,4 +33,4 @@ in which case all rows will be updated.
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:462](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L462)
|
||||
[index.ts:469](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L469)
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:472](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L472)
|
||||
[index.ts:479](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L479)
|
||||
|
||||
___
|
||||
|
||||
@@ -28,7 +28,7 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:473](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L473)
|
||||
[index.ts:480](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L480)
|
||||
|
||||
___
|
||||
|
||||
@@ -38,4 +38,4 @@ ___
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:474](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L474)
|
||||
[index.ts:481](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L481)
|
||||
|
||||
@@ -24,4 +24,4 @@ A [WriteMode](../enums/WriteMode.md) to use on this operation
|
||||
|
||||
#### Defined in
|
||||
|
||||
[index.ts:1015](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1015)
|
||||
[index.ts:1066](https://github.com/lancedb/lancedb/blob/5228ca4/node/src/index.ts#L1066)
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
# Python API Reference (SaaS)
|
||||
|
||||
This section contains the API reference for the SaaS Python API.
|
||||
|
||||
## Installation
|
||||
|
||||
```shell
|
||||
pip install lancedb
|
||||
```
|
||||
|
||||
## Connection
|
||||
|
||||
::: lancedb.connect
|
||||
|
||||
::: lancedb.remote.db.RemoteDBConnection
|
||||
|
||||
## Table
|
||||
|
||||
::: lancedb.remote.table.RemoteTable
|
||||
|
||||
@@ -9,6 +9,7 @@ const excludedGlobs = [
|
||||
"../src/embedding.md",
|
||||
"../src/examples/*.md",
|
||||
"../src/guides/tables.md",
|
||||
"../src/guides/storage.md",
|
||||
"../src/embeddings/*.md",
|
||||
];
|
||||
|
||||
|
||||
74
node/package-lock.json
generated
74
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.4.3",
|
||||
"version": "0.4.4",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "vectordb",
|
||||
"version": "0.4.3",
|
||||
"version": "0.4.4",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
@@ -53,11 +53,11 @@
|
||||
"uuid": "^9.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.4.3",
|
||||
"@lancedb/vectordb-darwin-x64": "0.4.3",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.4.3",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.4.3",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.4.3"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.4.4",
|
||||
"@lancedb/vectordb-darwin-x64": "0.4.4",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.4.4",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.4.4",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.4.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@75lb/deep-merge": {
|
||||
@@ -328,66 +328,6 @@
|
||||
"@jridgewell/sourcemap-codec": "^1.4.10"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||
"version": "0.4.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.3.tgz",
|
||||
"integrity": "sha512-47CvvSaV1EdUsFEpXUJApTk+hMzAhCxVizipCFUlXCgcmzpCDL86wNgJij/X9a+j6zADhIX//Lsu0qd/an/Bpw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||
"version": "0.4.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.3.tgz",
|
||||
"integrity": "sha512-UlZZv8CmJIuRJNJG+Y1VmFsGyPR8W/72Q5EwgMMsSES6zpMQ9pNdBDWhL3UGX6nMRgnbprkwYiWJ3xHhJvtqtw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||
"version": "0.4.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.3.tgz",
|
||||
"integrity": "sha512-L6NVJr/lKEd8+904FzZNpT8BGQMs2cHNYbGJMIaVvGnMiIJgKAFKtOyGtdDjoe1xRZoEw21yjRGksGbnRO5wHQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||
"version": "0.4.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.3.tgz",
|
||||
"integrity": "sha512-OBx3WF3pK0xNfFJeErmuD9R2QWLa3XdeZspyTsIrQmBDeKj3HKh8y7Scpx4NH5Y09+9JNqRRKRZN7OqWTYhITg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||
"version": "0.4.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.3.tgz",
|
||||
"integrity": "sha512-n9IvR81NXZKnSN91mrgeXbEyCiGM+YLJpOgbdHoEtMP04VDnS+iSU4jGOtQBKErvWeCJQaGFQ9qzdcVchpRGyw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
]
|
||||
},
|
||||
"node_modules/@neon-rs/cli": {
|
||||
"version": "0.0.160",
|
||||
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.4.3",
|
||||
"version": "0.4.4",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
@@ -81,10 +81,10 @@
|
||||
}
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.4.3",
|
||||
"@lancedb/vectordb-darwin-x64": "0.4.3",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.4.3",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.4.3",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.4.3"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.4.4",
|
||||
"@lancedb/vectordb-darwin-x64": "0.4.4",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.4.4",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.4.4",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.4.4"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Copyright 2023 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// Licensed under the Apache License, Version 2.0 (the "License")
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
@@ -49,6 +49,7 @@ export { Query }
|
||||
export type { EmbeddingFunction }
|
||||
export { OpenAIEmbeddingFunction } from './embedding/openai'
|
||||
export { makeArrowTable, type MakeArrowTableOptions } from './arrow'
|
||||
export { RemoteConnection, RemoteTable } from './remote'
|
||||
|
||||
const defaultAwsRegion = 'us-west-2'
|
||||
|
||||
@@ -725,6 +726,7 @@ export class LocalTable<T = number[]> implements Table<T> {
|
||||
* Insert records into this Table, replacing its contents.
|
||||
*
|
||||
* @param data Records to be inserted into the Table
|
||||
* Type Table is ArrowTable
|
||||
* @return The number of rows added to the table
|
||||
*/
|
||||
async overwrite (
|
||||
@@ -815,6 +817,7 @@ export class LocalTable<T = number[]> implements Table<T> {
|
||||
/**
|
||||
* Clean up old versions of the table, freeing disk space.
|
||||
*
|
||||
* Note: this API is not yet available on LanceDB Cloud
|
||||
* @param olderThan The minimum age in minutes of the versions to delete. If not
|
||||
* provided, defaults to two weeks.
|
||||
* @param deleteUnverified Because they may be part of an in-progress
|
||||
@@ -845,6 +848,7 @@ export class LocalTable<T = number[]> implements Table<T> {
|
||||
* This can be run after making several small appends to optimize the table
|
||||
* for faster reads.
|
||||
*
|
||||
* Note: this API is not yet available on LanceDB Cloud
|
||||
* @param options Advanced options configuring compaction. In most cases, you
|
||||
* can omit this arguments, as the default options are sensible
|
||||
* for most tables.
|
||||
@@ -966,6 +970,9 @@ export interface IvfPQIndexConfig {
|
||||
/**
|
||||
* A unique name for the index
|
||||
*/
|
||||
/**
|
||||
* Note: this parameter is not supported on LanceDB Cloud
|
||||
*/
|
||||
index_name?: string
|
||||
|
||||
/**
|
||||
@@ -976,35 +983,57 @@ export interface IvfPQIndexConfig {
|
||||
/**
|
||||
* The number of partitions this index
|
||||
*/
|
||||
/**
|
||||
* Note: this parameter is not yet supported on LanceDB Cloud
|
||||
*/
|
||||
num_partitions?: number
|
||||
|
||||
/**
|
||||
* The max number of iterations for kmeans training.
|
||||
*/
|
||||
/**
|
||||
* Note: this parameter is not yet supported on LanceDB Cloud
|
||||
*/
|
||||
max_iters?: number
|
||||
|
||||
/**
|
||||
* Train as optimized product quantization.
|
||||
*/
|
||||
/**
|
||||
* Note: this parameter is not yet supported on LanceDB Cloud
|
||||
*/
|
||||
use_opq?: boolean
|
||||
|
||||
/**
|
||||
* Number of subvectors to build PQ code
|
||||
*/
|
||||
/**
|
||||
* Note: this parameter is not yet supported on LanceDB Cloud
|
||||
*/
|
||||
num_sub_vectors?: number
|
||||
|
||||
/**
|
||||
* The number of bits to present one PQ centroid.
|
||||
*/
|
||||
/**
|
||||
* Note: this parameter is not yet supported on LanceDB Cloud
|
||||
*/
|
||||
num_bits?: number
|
||||
|
||||
/**
|
||||
* Max number of iterations to train OPQ, if `use_opq` is true.
|
||||
*/
|
||||
/**
|
||||
* Note: this parameter is not yet supported on LanceDB Cloud
|
||||
*/
|
||||
max_opq_iters?: number
|
||||
|
||||
/**
|
||||
* Replace an existing index with the same name if it exists.
|
||||
*/
|
||||
/**
|
||||
* Note: this parameter is not yet supported on LanceDB Cloud
|
||||
*/
|
||||
replace?: boolean
|
||||
|
||||
/**
|
||||
|
||||
22
nodejs/.eslintrc.js
Normal file
22
nodejs/.eslintrc.js
Normal file
@@ -0,0 +1,22 @@
|
||||
module.exports = {
|
||||
env: {
|
||||
browser: true,
|
||||
es2021: true,
|
||||
},
|
||||
extends: [
|
||||
"eslint:recommended",
|
||||
"plugin:@typescript-eslint/recommended-type-checked",
|
||||
"plugin:@typescript-eslint/stylistic-type-checked",
|
||||
],
|
||||
overrides: [],
|
||||
parserOptions: {
|
||||
project: "./tsconfig.json",
|
||||
ecmaVersion: "latest",
|
||||
sourceType: "module",
|
||||
},
|
||||
rules: {
|
||||
"@typescript-eslint/method-signature-style": "off",
|
||||
"@typescript-eslint/no-explicit-any": "off",
|
||||
},
|
||||
ignorePatterns: ["node_modules/", "dist/", "build/", "vectordb/native.*"],
|
||||
};
|
||||
15
nodejs/.npmignore
Normal file
15
nodejs/.npmignore
Normal file
@@ -0,0 +1,15 @@
|
||||
target
|
||||
Cargo.lock
|
||||
.cargo
|
||||
.github
|
||||
npm
|
||||
.eslintrc
|
||||
.prettierignore
|
||||
rustfmt.toml
|
||||
yarn.lock
|
||||
*.node
|
||||
.yarn
|
||||
__test__
|
||||
renovate.json
|
||||
.idea
|
||||
src
|
||||
27
nodejs/Cargo.toml
Normal file
27
nodejs/Cargo.toml
Normal file
@@ -0,0 +1,27 @@
|
||||
[package]
|
||||
name = "vectordb-nodejs"
|
||||
edition = "2021"
|
||||
version = "0.0.0"
|
||||
license.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
arrow-ipc.workspace = true
|
||||
napi = { version = "2.14", default-features = false, features = [
|
||||
"napi7",
|
||||
"async"
|
||||
] }
|
||||
napi-derive = "2.14"
|
||||
vectordb = { path = "../rust/vectordb" }
|
||||
lance.workspace = true
|
||||
lance-linalg.workspace = true
|
||||
|
||||
[build-dependencies]
|
||||
napi-build = "2.1"
|
||||
|
||||
[profile.release]
|
||||
lto = true
|
||||
strip = "symbols"
|
||||
24
nodejs/README.md
Normal file
24
nodejs/README.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# (New) LanceDB NodeJS SDK
|
||||
|
||||
It will replace the NodeJS SDK when it is ready.
|
||||
|
||||
|
||||
## Development
|
||||
|
||||
```sh
|
||||
npm run build
|
||||
npm t
|
||||
```
|
||||
|
||||
Generating docs
|
||||
|
||||
```
|
||||
npm run docs
|
||||
|
||||
cd ../docs
|
||||
# Asssume the virtual environment was created
|
||||
# python3 -m venv venv
|
||||
# pip install -r requirements.txt
|
||||
. ./venv/bin/activate
|
||||
mkdocs build
|
||||
```
|
||||
106
nodejs/__test__/arrow.test.ts
Normal file
106
nodejs/__test__/arrow.test.ts
Normal file
@@ -0,0 +1,106 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import { makeArrowTable, toBuffer } from "../vectordb/arrow";
|
||||
import {
|
||||
Field,
|
||||
FixedSizeList,
|
||||
Float16,
|
||||
Float32,
|
||||
Int32,
|
||||
tableFromIPC,
|
||||
Schema,
|
||||
Float64,
|
||||
} from "apache-arrow";
|
||||
|
||||
test("customized schema", function () {
|
||||
const schema = new Schema([
|
||||
new Field("a", new Int32(), true),
|
||||
new Field("b", new Float32(), true),
|
||||
new Field(
|
||||
"c",
|
||||
new FixedSizeList(3, new Field("item", new Float16())),
|
||||
true
|
||||
),
|
||||
]);
|
||||
const table = makeArrowTable(
|
||||
[
|
||||
{ a: 1, b: 2, c: [1, 2, 3] },
|
||||
{ a: 4, b: 5, c: [4, 5, 6] },
|
||||
{ a: 7, b: 8, c: [7, 8, 9] },
|
||||
],
|
||||
{ schema }
|
||||
);
|
||||
|
||||
expect(table.schema.toString()).toEqual(schema.toString());
|
||||
|
||||
const buf = toBuffer(table);
|
||||
expect(buf.byteLength).toBeGreaterThan(0);
|
||||
|
||||
const actual = tableFromIPC(buf);
|
||||
expect(actual.numRows).toBe(3);
|
||||
const actualSchema = actual.schema;
|
||||
expect(actualSchema.toString()).toStrictEqual(schema.toString());
|
||||
});
|
||||
|
||||
test("default vector column", function () {
|
||||
const schema = new Schema([
|
||||
new Field("a", new Float64(), true),
|
||||
new Field("b", new Float64(), true),
|
||||
new Field("vector", new FixedSizeList(3, new Field("item", new Float32()))),
|
||||
]);
|
||||
const table = makeArrowTable([
|
||||
{ a: 1, b: 2, vector: [1, 2, 3] },
|
||||
{ a: 4, b: 5, vector: [4, 5, 6] },
|
||||
{ a: 7, b: 8, vector: [7, 8, 9] },
|
||||
]);
|
||||
|
||||
const buf = toBuffer(table);
|
||||
expect(buf.byteLength).toBeGreaterThan(0);
|
||||
|
||||
const actual = tableFromIPC(buf);
|
||||
expect(actual.numRows).toBe(3);
|
||||
const actualSchema = actual.schema;
|
||||
expect(actualSchema.toString()).toEqual(actualSchema.toString());
|
||||
});
|
||||
|
||||
test("2 vector columns", function () {
|
||||
const schema = new Schema([
|
||||
new Field("a", new Float64()),
|
||||
new Field("b", new Float64()),
|
||||
new Field("vec1", new FixedSizeList(3, new Field("item", new Float16()))),
|
||||
new Field("vec2", new FixedSizeList(3, new Field("item", new Float16()))),
|
||||
]);
|
||||
const table = makeArrowTable(
|
||||
[
|
||||
{ a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
|
||||
{ a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
|
||||
{ a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] },
|
||||
],
|
||||
{
|
||||
vectorColumns: {
|
||||
vec1: { type: new Float16() },
|
||||
vec2: { type: new Float16() },
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
const buf = toBuffer(table);
|
||||
expect(buf.byteLength).toBeGreaterThan(0);
|
||||
|
||||
const actual = tableFromIPC(buf);
|
||||
expect(actual.numRows).toBe(3);
|
||||
const actualSchema = actual.schema;
|
||||
expect(actualSchema.toString()).toEqual(schema.toString());
|
||||
});
|
||||
34
nodejs/__test__/index.test.ts
Normal file
34
nodejs/__test__/index.test.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import * as os from "os";
|
||||
import * as path from "path";
|
||||
import * as fs from "fs";
|
||||
|
||||
import { Schema, Field, Float64 } from "apache-arrow";
|
||||
import { connect } from "../dist/index.js";
|
||||
|
||||
test("open database", async () => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "test-open"));
|
||||
|
||||
const db = await connect(tmpDir);
|
||||
let tableNames = await db.tableNames();
|
||||
expect(tableNames).toStrictEqual([]);
|
||||
|
||||
const tbl = await db.createTable("test", [{ id: 1 }, { id: 2 }]);
|
||||
expect(await db.tableNames()).toStrictEqual(["test"]);
|
||||
|
||||
const schema = tbl.schema;
|
||||
expect(schema).toEqual(new Schema([new Field("id", new Float64(), true)]));
|
||||
});
|
||||
99
nodejs/__test__/table.test.ts
Normal file
99
nodejs/__test__/table.test.ts
Normal file
@@ -0,0 +1,99 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import * as os from "os";
|
||||
import * as path from "path";
|
||||
import * as fs from "fs";
|
||||
|
||||
import { connect } from "../dist";
|
||||
import { Schema, Field, Float32, Int32, FixedSizeList } from "apache-arrow";
|
||||
import { makeArrowTable } from "../dist/arrow";
|
||||
|
||||
describe("Test creating index", () => {
|
||||
let tmpDir: string;
|
||||
const schema = new Schema([
|
||||
new Field("id", new Int32(), true),
|
||||
new Field("vec", new FixedSizeList(32, new Field("item", new Float32()))),
|
||||
]);
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "index-"));
|
||||
});
|
||||
|
||||
test("create vector index with no column", async () => {
|
||||
const db = await connect(tmpDir);
|
||||
const data = makeArrowTable(
|
||||
Array(300)
|
||||
.fill(1)
|
||||
.map((_, i) => ({
|
||||
id: i,
|
||||
vec: Array(32)
|
||||
.fill(1)
|
||||
.map(() => Math.random()),
|
||||
})),
|
||||
{
|
||||
schema,
|
||||
}
|
||||
);
|
||||
const tbl = await db.createTable("test", data);
|
||||
await tbl.createIndex().build();
|
||||
|
||||
// check index directory
|
||||
const indexDir = path.join(tmpDir, "test.lance", "_indices");
|
||||
expect(fs.readdirSync(indexDir)).toHaveLength(1);
|
||||
// TODO: check index type.
|
||||
});
|
||||
|
||||
test("no vector column available", async () => {
|
||||
const db = await connect(tmpDir);
|
||||
const tbl = await db.createTable(
|
||||
"no_vec",
|
||||
makeArrowTable([
|
||||
{ id: 1, val: 2 },
|
||||
{ id: 2, val: 3 },
|
||||
])
|
||||
);
|
||||
await expect(tbl.createIndex().build()).rejects.toThrow(
|
||||
"No vector column found"
|
||||
);
|
||||
|
||||
await tbl.createIndex("val").build();
|
||||
const indexDir = path.join(tmpDir, "no_vec.lance", "_indices");
|
||||
expect(fs.readdirSync(indexDir)).toHaveLength(1);
|
||||
});
|
||||
|
||||
test("create scalar index", async () => {
|
||||
const db = await connect(tmpDir);
|
||||
const data = makeArrowTable(
|
||||
Array(300)
|
||||
.fill(1)
|
||||
.map((_, i) => ({
|
||||
id: i,
|
||||
vec: Array(32)
|
||||
.fill(1)
|
||||
.map(() => Math.random()),
|
||||
})),
|
||||
{
|
||||
schema,
|
||||
}
|
||||
);
|
||||
const tbl = await db.createTable("test", data);
|
||||
await tbl.createIndex("id").build();
|
||||
|
||||
// check index directory
|
||||
const indexDir = path.join(tmpDir, "test.lance", "_indices");
|
||||
expect(fs.readdirSync(indexDir)).toHaveLength(1);
|
||||
// TODO: check index type.
|
||||
});
|
||||
});
|
||||
5
nodejs/build.rs
Normal file
5
nodejs/build.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
extern crate napi_build;
|
||||
|
||||
fn main() {
|
||||
napi_build::setup();
|
||||
}
|
||||
5
nodejs/jest.config.js
Normal file
5
nodejs/jest.config.js
Normal file
@@ -0,0 +1,5 @@
|
||||
/** @type {import('ts-jest').JestConfigWithTsJest} */
|
||||
module.exports = {
|
||||
preset: 'ts-jest',
|
||||
testEnvironment: 'node',
|
||||
};
|
||||
3
nodejs/npm/darwin-arm64/README.md
Normal file
3
nodejs/npm/darwin-arm64/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# `vectordb-darwin-arm64`
|
||||
|
||||
This is the **aarch64-apple-darwin** binary for `vectordb`
|
||||
18
nodejs/npm/darwin-arm64/package.json
Normal file
18
nodejs/npm/darwin-arm64/package.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "vectordb-darwin-arm64",
|
||||
"version": "0.4.3",
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"main": "vectordb.darwin-arm64.node",
|
||||
"files": [
|
||||
"vectordb.darwin-arm64.node"
|
||||
],
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
}
|
||||
3
nodejs/npm/darwin-x64/README.md
Normal file
3
nodejs/npm/darwin-x64/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# `vectordb-darwin-x64`
|
||||
|
||||
This is the **x86_64-apple-darwin** binary for `vectordb`
|
||||
18
nodejs/npm/darwin-x64/package.json
Normal file
18
nodejs/npm/darwin-x64/package.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "vectordb-darwin-x64",
|
||||
"version": "0.4.3",
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"main": "vectordb.darwin-x64.node",
|
||||
"files": [
|
||||
"vectordb.darwin-x64.node"
|
||||
],
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
}
|
||||
3
nodejs/npm/linux-arm64-gnu/README.md
Normal file
3
nodejs/npm/linux-arm64-gnu/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# `vectordb-linux-arm64-gnu`
|
||||
|
||||
This is the **aarch64-unknown-linux-gnu** binary for `vectordb`
|
||||
21
nodejs/npm/linux-arm64-gnu/package.json
Normal file
21
nodejs/npm/linux-arm64-gnu/package.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "vectordb-linux-arm64-gnu",
|
||||
"version": "0.4.3",
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"main": "vectordb.linux-arm64-gnu.node",
|
||||
"files": [
|
||||
"vectordb.linux-arm64-gnu.node"
|
||||
],
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
},
|
||||
"libc": [
|
||||
"glibc"
|
||||
]
|
||||
}
|
||||
3
nodejs/npm/linux-x64-gnu/README.md
Normal file
3
nodejs/npm/linux-x64-gnu/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# `vectordb-linux-x64-gnu`
|
||||
|
||||
This is the **x86_64-unknown-linux-gnu** binary for `vectordb`
|
||||
21
nodejs/npm/linux-x64-gnu/package.json
Normal file
21
nodejs/npm/linux-x64-gnu/package.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "vectordb-linux-x64-gnu",
|
||||
"version": "0.4.3",
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"main": "vectordb.linux-x64-gnu.node",
|
||||
"files": [
|
||||
"vectordb.linux-x64-gnu.node"
|
||||
],
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
},
|
||||
"libc": [
|
||||
"glibc"
|
||||
]
|
||||
}
|
||||
6300
nodejs/package-lock.json
generated
Normal file
6300
nodejs/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
67
nodejs/package.json
Normal file
67
nodejs/package.json
Normal file
@@ -0,0 +1,67 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.4.3",
|
||||
"main": "./dist/index.js",
|
||||
"types": "./dist/index.d.ts",
|
||||
"napi": {
|
||||
"name": "vectordb-nodejs",
|
||||
"triples": {
|
||||
"defaults": false,
|
||||
"additional": [
|
||||
"aarch64-apple-darwin",
|
||||
"aarch64-unknown-linux-gnu",
|
||||
"x86_64-apple-darwin",
|
||||
"x86_64-unknown-linux-gnu"
|
||||
]
|
||||
}
|
||||
},
|
||||
"license": "Apache 2.0",
|
||||
"devDependencies": {
|
||||
"@napi-rs/cli": "^2.18.0",
|
||||
"@types/jest": "^29.5.11",
|
||||
"@typescript-eslint/eslint-plugin": "^6.19.0",
|
||||
"@typescript-eslint/parser": "^6.19.0",
|
||||
"eslint": "^8.56.0",
|
||||
"jest": "^29.7.0",
|
||||
"ts-jest": "^29.1.2",
|
||||
"typedoc": "^0.25.7",
|
||||
"typedoc-plugin-markdown": "^3.17.1",
|
||||
"typescript": "^5.3.3"
|
||||
},
|
||||
"ava": {
|
||||
"timeout": "3m"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
],
|
||||
"os": [
|
||||
"darwin",
|
||||
"linux",
|
||||
"windows"
|
||||
],
|
||||
"scripts": {
|
||||
"artifacts": "napi artifacts",
|
||||
"build:native": "napi build --platform --release --js vectordb/native.js --dts vectordb/native.d.ts dist/",
|
||||
"build:debug": "napi build --platform --dts ../vectordb/native.d.ts --js ../vectordb/native.js dist/",
|
||||
"build": "npm run build:debug && tsc -b",
|
||||
"docs": "typedoc --plugin typedoc-plugin-markdown vectordb/index.ts",
|
||||
"lint": "eslint vectordb --ext .js,.ts",
|
||||
"prepublishOnly": "napi prepublish -t npm",
|
||||
"test": "npm run build && jest",
|
||||
"universal": "napi universal",
|
||||
"version": "napi version"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"vectordb-darwin-arm64": "0.4.3",
|
||||
"vectordb-darwin-x64": "0.4.3",
|
||||
"vectordb-linux-arm64-gnu": "0.4.3",
|
||||
"vectordb-linux-x64-gnu": "0.4.3"
|
||||
},
|
||||
"dependencies": {
|
||||
"apache-arrow": "^15.0.0"
|
||||
}
|
||||
}
|
||||
86
nodejs/src/connection.rs
Normal file
86
nodejs/src/connection.rs
Normal file
@@ -0,0 +1,86 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi_derive::*;
|
||||
|
||||
use crate::table::Table;
|
||||
use vectordb::connection::{Connection as LanceDBConnection, Database};
|
||||
use vectordb::ipc::ipc_file_to_batches;
|
||||
|
||||
#[napi]
|
||||
pub struct Connection {
|
||||
conn: Arc<dyn LanceDBConnection>,
|
||||
}
|
||||
|
||||
#[napi]
|
||||
impl Connection {
|
||||
/// Create a new Connection instance from the given URI.
|
||||
#[napi(factory)]
|
||||
pub async fn new(uri: String) -> napi::Result<Self> {
|
||||
Ok(Self {
|
||||
conn: Arc::new(Database::connect(&uri).await.map_err(|e| {
|
||||
napi::Error::from_reason(format!("Failed to connect to database: {}", e))
|
||||
})?),
|
||||
})
|
||||
}
|
||||
|
||||
/// List all tables in the dataset.
|
||||
#[napi]
|
||||
pub async fn table_names(&self) -> napi::Result<Vec<String>> {
|
||||
self.conn
|
||||
.table_names()
|
||||
.await
|
||||
.map_err(|e| napi::Error::from_reason(format!("{}", e)))
|
||||
}
|
||||
|
||||
/// Create table from a Apache Arrow IPC (file) buffer.
|
||||
///
|
||||
/// Parameters:
|
||||
/// - name: The name of the table.
|
||||
/// - buf: The buffer containing the IPC file.
|
||||
///
|
||||
#[napi]
|
||||
pub async fn create_table(&self, name: String, buf: Buffer) -> napi::Result<Table> {
|
||||
let batches = ipc_file_to_batches(buf.to_vec())
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
|
||||
let tbl = self
|
||||
.conn
|
||||
.create_table(&name, Box::new(batches), None)
|
||||
.await
|
||||
.map_err(|e| napi::Error::from_reason(format!("{}", e)))?;
|
||||
Ok(Table::new(tbl))
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn open_table(&self, name: String) -> napi::Result<Table> {
|
||||
let tbl = self
|
||||
.conn
|
||||
.open_table(&name)
|
||||
.await
|
||||
.map_err(|e| napi::Error::from_reason(format!("{}", e)))?;
|
||||
Ok(Table::new(tbl))
|
||||
}
|
||||
|
||||
/// Drop table with the name. Or raise an error if the table does not exist.
|
||||
#[napi]
|
||||
pub async fn drop_table(&self, name: String) -> napi::Result<()> {
|
||||
self.conn
|
||||
.drop_table(&name)
|
||||
.await
|
||||
.map_err(|e| napi::Error::from_reason(format!("{}", e)))
|
||||
}
|
||||
}
|
||||
101
nodejs/src/index.rs
Normal file
101
nodejs/src/index.rs
Normal file
@@ -0,0 +1,101 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use lance_linalg::distance::MetricType as LanceMetricType;
|
||||
use napi_derive::napi;
|
||||
|
||||
#[napi]
|
||||
pub enum IndexType {
|
||||
Scalar,
|
||||
IvfPq,
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub enum MetricType {
|
||||
L2,
|
||||
Cosine,
|
||||
Dot,
|
||||
}
|
||||
|
||||
impl From<MetricType> for LanceMetricType {
|
||||
fn from(metric: MetricType) -> Self {
|
||||
match metric {
|
||||
MetricType::L2 => Self::L2,
|
||||
MetricType::Cosine => Self::Cosine,
|
||||
MetricType::Dot => Self::Dot,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub struct IndexBuilder {
|
||||
inner: vectordb::index::IndexBuilder,
|
||||
}
|
||||
|
||||
#[napi]
|
||||
impl IndexBuilder {
|
||||
pub fn new(tbl: &dyn vectordb::Table) -> Self {
|
||||
let inner = tbl.create_index(&[]);
|
||||
Self { inner }
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub unsafe fn replace(&mut self, v: bool) {
|
||||
self.inner.replace(v);
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub unsafe fn column(&mut self, c: String) {
|
||||
self.inner.columns(&[c.as_str()]);
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub unsafe fn name(&mut self, name: String) {
|
||||
self.inner.name(name.as_str());
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub unsafe fn ivf_pq(
|
||||
&mut self,
|
||||
metric_type: Option<MetricType>,
|
||||
num_partitions: Option<u32>,
|
||||
num_sub_vectors: Option<u32>,
|
||||
num_bits: Option<u32>,
|
||||
max_iterations: Option<u32>,
|
||||
sample_rate: Option<u32>,
|
||||
) {
|
||||
self.inner.ivf_pq();
|
||||
metric_type.map(|m| self.inner.metric_type(m.into()));
|
||||
num_partitions.map(|p| self.inner.num_partitions(p));
|
||||
num_sub_vectors.map(|s| self.inner.num_sub_vectors(s));
|
||||
num_bits.map(|b| self.inner.num_bits(b));
|
||||
max_iterations.map(|i| self.inner.max_iterations(i));
|
||||
sample_rate.map(|s| self.inner.sample_rate(s));
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub unsafe fn scalar(&mut self) {
|
||||
self.inner.scalar();
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn build(&self) -> napi::Result<()> {
|
||||
println!("nodejs::index.rs : build");
|
||||
self.inner
|
||||
.build()
|
||||
.await
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to build index: {}", e)))?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
47
nodejs/src/lib.rs
Normal file
47
nodejs/src/lib.rs
Normal file
@@ -0,0 +1,47 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use connection::Connection;
|
||||
use napi_derive::*;
|
||||
|
||||
mod connection;
|
||||
mod index;
|
||||
mod query;
|
||||
mod table;
|
||||
|
||||
#[napi(object)]
|
||||
pub struct ConnectionOptions {
|
||||
pub uri: String,
|
||||
pub api_key: Option<String>,
|
||||
pub host_override: Option<String>,
|
||||
}
|
||||
|
||||
/// Write mode for writing a table.
|
||||
#[napi(string_enum)]
|
||||
pub enum WriteMode {
|
||||
Create,
|
||||
Append,
|
||||
Overwrite,
|
||||
}
|
||||
|
||||
/// Write options when creating a Table.
|
||||
#[napi(object)]
|
||||
pub struct WriteOptions {
|
||||
pub mode: Option<WriteMode>,
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn connect(options: ConnectionOptions) -> napi::Result<Connection> {
|
||||
Connection::new(options.uri.clone()).await
|
||||
}
|
||||
48
nodejs/src/query.rs
Normal file
48
nodejs/src/query.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi_derive::napi;
|
||||
use vectordb::query::Query as LanceDBQuery;
|
||||
|
||||
use crate::table::Table;
|
||||
|
||||
#[napi]
|
||||
pub struct Query {
|
||||
inner: LanceDBQuery,
|
||||
}
|
||||
|
||||
#[napi]
|
||||
impl Query {
|
||||
pub fn new(table: &Table) -> Self {
|
||||
Self {
|
||||
inner: table.table.query(),
|
||||
}
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn vector(&mut self, vector: Float32Array) {
|
||||
let inn = self.inner.clone().nearest_to(&vector);
|
||||
self.inner = inn;
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn to_arrow(&self) -> napi::Result<()> {
|
||||
// let buf = self.inner.to_arrow().map_err(|e| {
|
||||
// napi::Error::from_reason(format!("Failed to convert query to arrow: {}", e))
|
||||
// })?;
|
||||
// Ok(buf)
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
88
nodejs/src/table.rs
Normal file
88
nodejs/src/table.rs
Normal file
@@ -0,0 +1,88 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow_ipc::writer::FileWriter;
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi_derive::napi;
|
||||
use vectordb::{ipc::ipc_file_to_batches, table::TableRef};
|
||||
|
||||
use crate::index::IndexBuilder;
|
||||
use crate::query::Query;
|
||||
|
||||
#[napi]
|
||||
pub struct Table {
|
||||
pub(crate) table: TableRef,
|
||||
}
|
||||
|
||||
#[napi]
|
||||
impl Table {
|
||||
pub(crate) fn new(table: TableRef) -> Self {
|
||||
Self { table }
|
||||
}
|
||||
|
||||
/// Return Schema as empty Arrow IPC file.
|
||||
#[napi]
|
||||
pub fn schema(&self) -> napi::Result<Buffer> {
|
||||
let mut writer = FileWriter::try_new(vec![], &self.table.schema())
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to create IPC file: {}", e)))?;
|
||||
writer
|
||||
.finish()
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to finish IPC file: {}", e)))?;
|
||||
Ok(Buffer::from(writer.into_inner().map_err(|e| {
|
||||
napi::Error::from_reason(format!("Failed to get IPC file: {}", e))
|
||||
})?))
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn add(&self, buf: Buffer) -> napi::Result<()> {
|
||||
let batches = ipc_file_to_batches(buf.to_vec())
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
|
||||
self.table.add(Box::new(batches), None).await.map_err(|e| {
|
||||
napi::Error::from_reason(format!(
|
||||
"Failed to add batches to table {}: {}",
|
||||
self.table, e
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn count_rows(&self) -> napi::Result<usize> {
|
||||
self.table.count_rows().await.map_err(|e| {
|
||||
napi::Error::from_reason(format!(
|
||||
"Failed to count rows in table {}: {}",
|
||||
self.table, e
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn delete(&self, predicate: String) -> napi::Result<()> {
|
||||
self.table.delete(&predicate).await.map_err(|e| {
|
||||
napi::Error::from_reason(format!(
|
||||
"Failed to delete rows in table {}: predicate={}",
|
||||
self.table, e
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn create_index(&self) -> IndexBuilder {
|
||||
IndexBuilder::new(self.table.as_ref())
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn query(&self) -> Query {
|
||||
Query::new(self)
|
||||
}
|
||||
}
|
||||
31
nodejs/tsconfig.json
Normal file
31
nodejs/tsconfig.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"include": [
|
||||
"vectordb/*.ts",
|
||||
"vectordb/**/*.ts",
|
||||
"vectordb/*.js",
|
||||
],
|
||||
"compilerOptions": {
|
||||
"target": "es2022",
|
||||
"module": "commonjs",
|
||||
"declaration": true,
|
||||
"outDir": "./dist",
|
||||
"strict": true,
|
||||
"allowJs": true,
|
||||
"resolveJsonModule": true,
|
||||
},
|
||||
"exclude": [
|
||||
"./dist/*",
|
||||
],
|
||||
"typedocOptions": {
|
||||
"entryPoints": [
|
||||
"vectordb/index.ts"
|
||||
],
|
||||
"out": "../docs/src/javascript/",
|
||||
"visibilityFilters": {
|
||||
"protected": false,
|
||||
"private": false,
|
||||
"inherited": true,
|
||||
"external": false,
|
||||
}
|
||||
}
|
||||
}
|
||||
183
nodejs/vectordb/arrow.ts
Normal file
183
nodejs/vectordb/arrow.ts
Normal file
@@ -0,0 +1,183 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import {
|
||||
Field,
|
||||
FixedSizeList,
|
||||
Float,
|
||||
Float32,
|
||||
Schema,
|
||||
Table as ArrowTable,
|
||||
Table,
|
||||
Vector,
|
||||
vectorFromArray,
|
||||
tableToIPC,
|
||||
} from "apache-arrow";
|
||||
|
||||
/** Data type accepted by NodeJS SDK */
|
||||
export type Data = Record<string, unknown>[] | ArrowTable;
|
||||
|
||||
export class VectorColumnOptions {
|
||||
/** Vector column type. */
|
||||
type: Float = new Float32();
|
||||
|
||||
constructor(values?: Partial<VectorColumnOptions>) {
|
||||
Object.assign(this, values);
|
||||
}
|
||||
}
|
||||
|
||||
/** Options to control the makeArrowTable call. */
|
||||
export class MakeArrowTableOptions {
|
||||
/** Provided schema. */
|
||||
schema?: Schema;
|
||||
|
||||
/** Vector columns */
|
||||
vectorColumns: Record<string, VectorColumnOptions> = {
|
||||
vector: new VectorColumnOptions(),
|
||||
};
|
||||
|
||||
constructor(values?: Partial<MakeArrowTableOptions>) {
|
||||
Object.assign(this, values);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An enhanced version of the {@link makeTable} function from Apache Arrow
|
||||
* that supports nested fields and embeddings columns.
|
||||
*
|
||||
* Note that it currently does not support nulls.
|
||||
*
|
||||
* @param data input data
|
||||
* @param options options to control the makeArrowTable call.
|
||||
*
|
||||
* @example
|
||||
*
|
||||
* ```ts
|
||||
*
|
||||
* import { fromTableToBuffer, makeArrowTable } from "../arrow";
|
||||
* import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
|
||||
*
|
||||
* const schema = new Schema([
|
||||
* new Field("a", new Int32()),
|
||||
* new Field("b", new Float32()),
|
||||
* new Field("c", new FixedSizeList(3, new Field("item", new Float16()))),
|
||||
* ]);
|
||||
* const table = makeArrowTable([
|
||||
* { a: 1, b: 2, c: [1, 2, 3] },
|
||||
* { a: 4, b: 5, c: [4, 5, 6] },
|
||||
* { a: 7, b: 8, c: [7, 8, 9] },
|
||||
* ], { schema });
|
||||
* ```
|
||||
*
|
||||
* It guesses the vector columns if the schema is not provided. For example,
|
||||
* by default it assumes that the column named `vector` is a vector column.
|
||||
*
|
||||
* ```ts
|
||||
*
|
||||
* const schema = new Schema([
|
||||
new Field("a", new Float64()),
|
||||
new Field("b", new Float64()),
|
||||
new Field(
|
||||
"vector",
|
||||
new FixedSizeList(3, new Field("item", new Float32()))
|
||||
),
|
||||
]);
|
||||
const table = makeArrowTable([
|
||||
{ a: 1, b: 2, vector: [1, 2, 3] },
|
||||
{ a: 4, b: 5, vector: [4, 5, 6] },
|
||||
{ a: 7, b: 8, vector: [7, 8, 9] },
|
||||
]);
|
||||
assert.deepEqual(table.schema, schema);
|
||||
* ```
|
||||
*
|
||||
* You can specify the vector column types and names using the options as well
|
||||
*
|
||||
* ```typescript
|
||||
*
|
||||
* const schema = new Schema([
|
||||
new Field('a', new Float64()),
|
||||
new Field('b', new Float64()),
|
||||
new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
|
||||
new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
|
||||
]);
|
||||
* const table = makeArrowTable([
|
||||
{ a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
|
||||
{ a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
|
||||
{ a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
|
||||
], {
|
||||
vectorColumns: {
|
||||
vec1: { type: new Float16() },
|
||||
vec2: { type: new Float16() }
|
||||
}
|
||||
}
|
||||
* assert.deepEqual(table.schema, schema)
|
||||
* ```
|
||||
*/
|
||||
export function makeArrowTable(
|
||||
data: Record<string, any>[],
|
||||
options?: Partial<MakeArrowTableOptions>
|
||||
): Table {
|
||||
if (data.length === 0) {
|
||||
throw new Error("At least one record needs to be provided");
|
||||
}
|
||||
const opt = new MakeArrowTableOptions(options ?? {});
|
||||
const columns: Record<string, Vector> = {};
|
||||
// TODO: sample dataset to find missing columns
|
||||
const columnNames = Object.keys(data[0]);
|
||||
for (const colName of columnNames) {
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
|
||||
const values = data.map((datum) => datum[colName]);
|
||||
let vector: Vector;
|
||||
|
||||
if (opt.schema !== undefined) {
|
||||
// Explicit schema is provided, highest priority
|
||||
vector = vectorFromArray(
|
||||
values,
|
||||
opt.schema?.fields.filter((f) => f.name === colName)[0]?.type
|
||||
);
|
||||
} else {
|
||||
const vectorColumnOptions = opt.vectorColumns[colName];
|
||||
if (vectorColumnOptions !== undefined) {
|
||||
const fslType = new FixedSizeList(
|
||||
(values[0] as any[]).length,
|
||||
new Field("item", vectorColumnOptions.type, false)
|
||||
);
|
||||
vector = vectorFromArray(values, fslType);
|
||||
} else {
|
||||
// Normal case
|
||||
vector = vectorFromArray(values);
|
||||
}
|
||||
}
|
||||
columns[colName] = vector;
|
||||
}
|
||||
|
||||
return new Table(columns);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an Arrow Table to a Buffer.
|
||||
*
|
||||
* @param data Arrow Table
|
||||
* @param schema Arrow Schema, optional
|
||||
* @returns Buffer node
|
||||
*/
|
||||
export function toBuffer(data: Data, schema?: Schema): Buffer {
|
||||
let tbl: Table;
|
||||
if (data instanceof Table) {
|
||||
tbl = data;
|
||||
} else {
|
||||
tbl = makeArrowTable(data, { schema });
|
||||
}
|
||||
return Buffer.from(tableToIPC(tbl));
|
||||
}
|
||||
70
nodejs/vectordb/connection.ts
Normal file
70
nodejs/vectordb/connection.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import { toBuffer } from "./arrow";
|
||||
import { Connection as _NativeConnection } from "./native";
|
||||
import { Table } from "./table";
|
||||
import { Table as ArrowTable } from "apache-arrow";
|
||||
|
||||
/**
|
||||
* A LanceDB Connection that allows you to open tables and create new ones.
|
||||
*
|
||||
* Connection could be local against filesystem or remote against a server.
|
||||
*/
|
||||
export class Connection {
|
||||
readonly inner: _NativeConnection;
|
||||
|
||||
constructor(inner: _NativeConnection) {
|
||||
this.inner = inner;
|
||||
}
|
||||
|
||||
/** List all the table names in this database. */
|
||||
async tableNames(): Promise<string[]> {
|
||||
return this.inner.tableNames();
|
||||
}
|
||||
|
||||
/**
|
||||
* Open a table in the database.
|
||||
*
|
||||
* @param name The name of the table.
|
||||
* @param embeddings An embedding function to use on this table
|
||||
*/
|
||||
async openTable(name: string): Promise<Table> {
|
||||
const innerTable = await this.inner.openTable(name);
|
||||
return new Table(innerTable);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new Table and initialize it with new data.
|
||||
*
|
||||
* @param {string} name - The name of the table.
|
||||
* @param data - Non-empty Array of Records to be inserted into the table
|
||||
*/
|
||||
async createTable(
|
||||
name: string,
|
||||
data: Record<string, unknown>[] | ArrowTable
|
||||
): Promise<Table> {
|
||||
const buf = toBuffer(data);
|
||||
const innerTable = await this.inner.createTable(name, buf);
|
||||
return new Table(innerTable);
|
||||
}
|
||||
|
||||
/**
|
||||
* Drop an existing table.
|
||||
* @param name The name of the table to drop.
|
||||
*/
|
||||
async dropTable(name: string): Promise<void> {
|
||||
return this.inner.dropTable(name);
|
||||
}
|
||||
}
|
||||
64
nodejs/vectordb/index.ts
Normal file
64
nodejs/vectordb/index.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import { Connection } from "./connection";
|
||||
import { Connection as NativeConnection, ConnectionOptions } from "./native.js";
|
||||
|
||||
export {
|
||||
ConnectionOptions,
|
||||
WriteOptions,
|
||||
Query,
|
||||
MetricType,
|
||||
} from "./native.js";
|
||||
export { Connection } from "./connection";
|
||||
export { Table } from "./table";
|
||||
export { Data } from "./arrow";
|
||||
export { IvfPQOptions, IndexBuilder } from "./indexer";
|
||||
|
||||
/**
|
||||
* Connect to a LanceDB instance at the given URI.
|
||||
*
|
||||
* Accpeted formats:
|
||||
*
|
||||
* - `/path/to/database` - local database
|
||||
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
||||
* - `db://host:port` - remote database (LanceDB cloud)
|
||||
*
|
||||
* @param uri The uri of the database. If the database uri starts with `db://` then it connects to a remote database.
|
||||
*
|
||||
* @see {@link ConnectionOptions} for more details on the URI format.
|
||||
*/
|
||||
export async function connect(uri: string): Promise<Connection>;
|
||||
export async function connect(
|
||||
opts: Partial<ConnectionOptions>
|
||||
): Promise<Connection>;
|
||||
export async function connect(
|
||||
args: string | Partial<ConnectionOptions>
|
||||
): Promise<Connection> {
|
||||
let opts: ConnectionOptions;
|
||||
if (typeof args === "string") {
|
||||
opts = { uri: args };
|
||||
} else {
|
||||
opts = Object.assign(
|
||||
{
|
||||
uri: "",
|
||||
apiKey: "",
|
||||
hostOverride: "",
|
||||
},
|
||||
args
|
||||
);
|
||||
}
|
||||
const nativeConn = await NativeConnection.new(opts.uri);
|
||||
return new Connection(nativeConn);
|
||||
}
|
||||
102
nodejs/vectordb/indexer.ts
Normal file
102
nodejs/vectordb/indexer.ts
Normal file
@@ -0,0 +1,102 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import {
|
||||
MetricType,
|
||||
IndexBuilder as NativeBuilder,
|
||||
Table as NativeTable,
|
||||
} from "./native";
|
||||
|
||||
/** Options to create `IVF_PQ` index */
|
||||
export interface IvfPQOptions {
|
||||
/** Number of IVF partitions. */
|
||||
num_partitions?: number;
|
||||
|
||||
/** Number of sub-vectors in PQ coding. */
|
||||
num_sub_vectors?: number;
|
||||
|
||||
/** Number of bits used for each PQ code.
|
||||
*/
|
||||
num_bits?: number;
|
||||
|
||||
/** Metric type to calculate the distance between vectors.
|
||||
*
|
||||
* Supported metrics: `L2`, `Cosine` and `Dot`.
|
||||
*/
|
||||
metric_type?: MetricType;
|
||||
|
||||
/** Number of iterations to train K-means.
|
||||
*
|
||||
* Default is 50. The more iterations it usually yield better results,
|
||||
* but it takes longer to train.
|
||||
*/
|
||||
max_iterations?: number;
|
||||
|
||||
sample_rate?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Building an index on LanceDB {@link Table}
|
||||
*
|
||||
* @see {@link Table.createIndex} for detailed usage.
|
||||
*/
|
||||
export class IndexBuilder {
|
||||
private inner: NativeBuilder;
|
||||
|
||||
constructor(tbl: NativeTable) {
|
||||
this.inner = tbl.createIndex();
|
||||
}
|
||||
|
||||
/** Instruct the builder to build an `IVF_PQ` index */
|
||||
ivf_pq(options?: IvfPQOptions): IndexBuilder {
|
||||
this.inner.ivfPq(
|
||||
options?.metric_type,
|
||||
options?.num_partitions,
|
||||
options?.num_sub_vectors,
|
||||
options?.num_bits,
|
||||
options?.max_iterations,
|
||||
options?.sample_rate
|
||||
);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Instruct the builder to build a Scalar index. */
|
||||
scalar(): IndexBuilder {
|
||||
this.scalar();
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Set the column(s) to create index on top of. */
|
||||
column(col: string): IndexBuilder {
|
||||
this.inner.column(col);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Set to true to replace existing index. */
|
||||
replace(val: boolean): IndexBuilder {
|
||||
this.inner.replace(val);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Specify the name of the index. Optional */
|
||||
name(n: string): IndexBuilder {
|
||||
this.inner.name(n);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Building the index. */
|
||||
async build() {
|
||||
await this.inner.build();
|
||||
}
|
||||
}
|
||||
69
nodejs/vectordb/native.d.ts
vendored
Normal file
69
nodejs/vectordb/native.d.ts
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
/* tslint:disable */
|
||||
/* eslint-disable */
|
||||
|
||||
/* auto-generated by NAPI-RS */
|
||||
|
||||
export const enum IndexType {
|
||||
Scalar = 0,
|
||||
IvfPq = 1
|
||||
}
|
||||
export const enum MetricType {
|
||||
L2 = 0,
|
||||
Cosine = 1,
|
||||
Dot = 2
|
||||
}
|
||||
export interface ConnectionOptions {
|
||||
uri: string
|
||||
apiKey?: string
|
||||
hostOverride?: string
|
||||
}
|
||||
/** Write mode for writing a table. */
|
||||
export const enum WriteMode {
|
||||
Create = 'Create',
|
||||
Append = 'Append',
|
||||
Overwrite = 'Overwrite'
|
||||
}
|
||||
/** Write options when creating a Table. */
|
||||
export interface WriteOptions {
|
||||
mode?: WriteMode
|
||||
}
|
||||
export function connect(options: ConnectionOptions): Promise<Connection>
|
||||
export class Connection {
|
||||
/** Create a new Connection instance from the given URI. */
|
||||
static new(uri: string): Promise<Connection>
|
||||
/** List all tables in the dataset. */
|
||||
tableNames(): Promise<Array<string>>
|
||||
/**
|
||||
* Create table from a Apache Arrow IPC (file) buffer.
|
||||
*
|
||||
* Parameters:
|
||||
* - name: The name of the table.
|
||||
* - buf: The buffer containing the IPC file.
|
||||
*
|
||||
*/
|
||||
createTable(name: string, buf: Buffer): Promise<Table>
|
||||
openTable(name: string): Promise<Table>
|
||||
/** Drop table with the name. Or raise an error if the table does not exist. */
|
||||
dropTable(name: string): Promise<void>
|
||||
}
|
||||
export class IndexBuilder {
|
||||
replace(v: boolean): void
|
||||
column(c: string): void
|
||||
name(name: string): void
|
||||
ivfPq(metricType?: MetricType | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, numBits?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): void
|
||||
scalar(): void
|
||||
build(): Promise<void>
|
||||
}
|
||||
export class Query {
|
||||
vector(vector: Float32Array): void
|
||||
toArrow(): void
|
||||
}
|
||||
export class Table {
|
||||
/** Return Schema as empty Arrow IPC file. */
|
||||
schema(): Buffer
|
||||
add(buf: Buffer): Promise<void>
|
||||
countRows(): Promise<bigint>
|
||||
delete(predicate: string): Promise<void>
|
||||
createIndex(): IndexBuilder
|
||||
query(): Query
|
||||
}
|
||||
307
nodejs/vectordb/native.js
Normal file
307
nodejs/vectordb/native.js
Normal file
@@ -0,0 +1,307 @@
|
||||
/* tslint:disable */
|
||||
/* eslint-disable */
|
||||
/* prettier-ignore */
|
||||
|
||||
/* auto-generated by NAPI-RS */
|
||||
|
||||
const { existsSync, readFileSync } = require('fs')
|
||||
const { join } = require('path')
|
||||
|
||||
const { platform, arch } = process
|
||||
|
||||
let nativeBinding = null
|
||||
let localFileExisted = false
|
||||
let loadError = null
|
||||
|
||||
function isMusl() {
|
||||
// For Node 10
|
||||
if (!process.report || typeof process.report.getReport !== 'function') {
|
||||
try {
|
||||
const lddPath = require('child_process').execSync('which ldd').toString().trim()
|
||||
return readFileSync(lddPath, 'utf8').includes('musl')
|
||||
} catch (e) {
|
||||
return true
|
||||
}
|
||||
} else {
|
||||
const { glibcVersionRuntime } = process.report.getReport().header
|
||||
return !glibcVersionRuntime
|
||||
}
|
||||
}
|
||||
|
||||
switch (platform) {
|
||||
case 'android':
|
||||
switch (arch) {
|
||||
case 'arm64':
|
||||
localFileExisted = existsSync(join(__dirname, 'vectordb-nodejs.android-arm64.node'))
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.android-arm64.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-android-arm64')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
break
|
||||
case 'arm':
|
||||
localFileExisted = existsSync(join(__dirname, 'vectordb-nodejs.android-arm-eabi.node'))
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.android-arm-eabi.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-android-arm-eabi')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
break
|
||||
default:
|
||||
throw new Error(`Unsupported architecture on Android ${arch}`)
|
||||
}
|
||||
break
|
||||
case 'win32':
|
||||
switch (arch) {
|
||||
case 'x64':
|
||||
localFileExisted = existsSync(
|
||||
join(__dirname, 'vectordb-nodejs.win32-x64-msvc.node')
|
||||
)
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.win32-x64-msvc.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-win32-x64-msvc')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
break
|
||||
case 'ia32':
|
||||
localFileExisted = existsSync(
|
||||
join(__dirname, 'vectordb-nodejs.win32-ia32-msvc.node')
|
||||
)
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.win32-ia32-msvc.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-win32-ia32-msvc')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
break
|
||||
case 'arm64':
|
||||
localFileExisted = existsSync(
|
||||
join(__dirname, 'vectordb-nodejs.win32-arm64-msvc.node')
|
||||
)
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.win32-arm64-msvc.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-win32-arm64-msvc')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
break
|
||||
default:
|
||||
throw new Error(`Unsupported architecture on Windows: ${arch}`)
|
||||
}
|
||||
break
|
||||
case 'darwin':
|
||||
localFileExisted = existsSync(join(__dirname, 'vectordb-nodejs.darwin-universal.node'))
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.darwin-universal.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-darwin-universal')
|
||||
}
|
||||
break
|
||||
} catch {}
|
||||
switch (arch) {
|
||||
case 'x64':
|
||||
localFileExisted = existsSync(join(__dirname, 'vectordb-nodejs.darwin-x64.node'))
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.darwin-x64.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-darwin-x64')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
break
|
||||
case 'arm64':
|
||||
localFileExisted = existsSync(
|
||||
join(__dirname, 'vectordb-nodejs.darwin-arm64.node')
|
||||
)
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.darwin-arm64.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-darwin-arm64')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
break
|
||||
default:
|
||||
throw new Error(`Unsupported architecture on macOS: ${arch}`)
|
||||
}
|
||||
break
|
||||
case 'freebsd':
|
||||
if (arch !== 'x64') {
|
||||
throw new Error(`Unsupported architecture on FreeBSD: ${arch}`)
|
||||
}
|
||||
localFileExisted = existsSync(join(__dirname, 'vectordb-nodejs.freebsd-x64.node'))
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.freebsd-x64.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-freebsd-x64')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
break
|
||||
case 'linux':
|
||||
switch (arch) {
|
||||
case 'x64':
|
||||
if (isMusl()) {
|
||||
localFileExisted = existsSync(
|
||||
join(__dirname, 'vectordb-nodejs.linux-x64-musl.node')
|
||||
)
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.linux-x64-musl.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-linux-x64-musl')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
} else {
|
||||
localFileExisted = existsSync(
|
||||
join(__dirname, 'vectordb-nodejs.linux-x64-gnu.node')
|
||||
)
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.linux-x64-gnu.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-linux-x64-gnu')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
}
|
||||
break
|
||||
case 'arm64':
|
||||
if (isMusl()) {
|
||||
localFileExisted = existsSync(
|
||||
join(__dirname, 'vectordb-nodejs.linux-arm64-musl.node')
|
||||
)
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.linux-arm64-musl.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-linux-arm64-musl')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
} else {
|
||||
localFileExisted = existsSync(
|
||||
join(__dirname, 'vectordb-nodejs.linux-arm64-gnu.node')
|
||||
)
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.linux-arm64-gnu.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-linux-arm64-gnu')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
}
|
||||
break
|
||||
case 'arm':
|
||||
localFileExisted = existsSync(
|
||||
join(__dirname, 'vectordb-nodejs.linux-arm-gnueabihf.node')
|
||||
)
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.linux-arm-gnueabihf.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-linux-arm-gnueabihf')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
break
|
||||
case 'riscv64':
|
||||
if (isMusl()) {
|
||||
localFileExisted = existsSync(
|
||||
join(__dirname, 'vectordb-nodejs.linux-riscv64-musl.node')
|
||||
)
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.linux-riscv64-musl.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-linux-riscv64-musl')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
} else {
|
||||
localFileExisted = existsSync(
|
||||
join(__dirname, 'vectordb-nodejs.linux-riscv64-gnu.node')
|
||||
)
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.linux-riscv64-gnu.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-linux-riscv64-gnu')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
}
|
||||
break
|
||||
case 's390x':
|
||||
localFileExisted = existsSync(
|
||||
join(__dirname, 'vectordb-nodejs.linux-s390x-gnu.node')
|
||||
)
|
||||
try {
|
||||
if (localFileExisted) {
|
||||
nativeBinding = require('./vectordb-nodejs.linux-s390x-gnu.node')
|
||||
} else {
|
||||
nativeBinding = require('vectordb-linux-s390x-gnu')
|
||||
}
|
||||
} catch (e) {
|
||||
loadError = e
|
||||
}
|
||||
break
|
||||
default:
|
||||
throw new Error(`Unsupported architecture on Linux: ${arch}`)
|
||||
}
|
||||
break
|
||||
default:
|
||||
throw new Error(`Unsupported OS: ${platform}, architecture: ${arch}`)
|
||||
}
|
||||
|
||||
if (!nativeBinding) {
|
||||
if (loadError) {
|
||||
throw loadError
|
||||
}
|
||||
throw new Error(`Failed to load native binding`)
|
||||
}
|
||||
|
||||
const { Connection, IndexType, MetricType, IndexBuilder, Query, Table, WriteMode, connect } = nativeBinding
|
||||
|
||||
module.exports.Connection = Connection
|
||||
module.exports.IndexType = IndexType
|
||||
module.exports.MetricType = MetricType
|
||||
module.exports.IndexBuilder = IndexBuilder
|
||||
module.exports.Query = Query
|
||||
module.exports.Table = Table
|
||||
module.exports.WriteMode = WriteMode
|
||||
module.exports.connect = connect
|
||||
93
nodejs/vectordb/query.ts
Normal file
93
nodejs/vectordb/query.ts
Normal file
@@ -0,0 +1,93 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import { RecordBatch } from "apache-arrow";
|
||||
import { Table } from "./table";
|
||||
|
||||
// TODO: re-eanble eslint once we have a real implementation
|
||||
/* eslint-disable */
|
||||
class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
||||
next(
|
||||
...args: [] | [undefined]
|
||||
): Promise<IteratorResult<RecordBatch<any>, any>> {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
return?(value?: any): Promise<IteratorResult<RecordBatch<any>, any>> {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
throw?(e?: any): Promise<IteratorResult<RecordBatch<any>, any>> {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
}
|
||||
/* eslint-enable */
|
||||
|
||||
/** Query executor */
|
||||
export class Query implements AsyncIterable<RecordBatch> {
|
||||
private readonly tbl: Table;
|
||||
private _filter?: string;
|
||||
private _limit?: number;
|
||||
|
||||
// Vector search
|
||||
private _vector?: Float32Array;
|
||||
private _nprobes?: number;
|
||||
private _refine_factor?: number = 1;
|
||||
|
||||
constructor(tbl: Table) {
|
||||
this.tbl = tbl;
|
||||
}
|
||||
|
||||
/** Set the filter predicate, only returns the results that satisfy the filter.
|
||||
*
|
||||
*/
|
||||
filter(predicate: string): Query {
|
||||
this._filter = predicate;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the limit of rows to return.
|
||||
*/
|
||||
limit(limit: number): Query {
|
||||
this._limit = limit;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the query vector.
|
||||
*/
|
||||
vector(vector: number[]): Query {
|
||||
this._vector = Float32Array.from(vector);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the number of probes to use for the query.
|
||||
*/
|
||||
nprobes(nprobes: number): Query {
|
||||
this._nprobes = nprobes;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the refine factor for the query.
|
||||
*/
|
||||
refine_factor(refine_factor: number): Query {
|
||||
this._refine_factor = refine_factor;
|
||||
return this;
|
||||
}
|
||||
|
||||
[Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>, any, undefined> {
|
||||
throw new RecordBatchIterator();
|
||||
}
|
||||
}
|
||||
105
nodejs/vectordb/table.ts
Normal file
105
nodejs/vectordb/table.ts
Normal file
@@ -0,0 +1,105 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import { Schema, tableFromIPC } from "apache-arrow";
|
||||
import { Table as _NativeTable } from "./native";
|
||||
import { toBuffer, Data } from "./arrow";
|
||||
import { Query } from "./query";
|
||||
import { IndexBuilder } from "./indexer";
|
||||
|
||||
/**
|
||||
* A LanceDB Table is the collection of Records.
|
||||
*
|
||||
* Each Record has one or more vector fields.
|
||||
*/
|
||||
export class Table {
|
||||
private readonly inner: _NativeTable;
|
||||
|
||||
/** Construct a Table. Internal use only. */
|
||||
constructor(inner: _NativeTable) {
|
||||
this.inner = inner;
|
||||
}
|
||||
|
||||
/** Get the schema of the table. */
|
||||
get schema(): Schema {
|
||||
const schemaBuf = this.inner.schema();
|
||||
const tbl = tableFromIPC(schemaBuf);
|
||||
return tbl.schema;
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert records into this Table.
|
||||
*
|
||||
* @param {Data} data Records to be inserted into the Table
|
||||
* @return The number of rows added to the table
|
||||
*/
|
||||
async add(data: Data): Promise<void> {
|
||||
const buffer = toBuffer(data);
|
||||
await this.inner.add(buffer);
|
||||
}
|
||||
|
||||
/** Count the total number of rows in the dataset. */
|
||||
async countRows(): Promise<bigint> {
|
||||
return await this.inner.countRows();
|
||||
}
|
||||
|
||||
/** Delete the rows that satisfy the predicate. */
|
||||
async delete(predicate: string): Promise<void> {
|
||||
await this.inner.delete(predicate);
|
||||
}
|
||||
|
||||
/** Create an index over the columns.
|
||||
*
|
||||
* @param {string} column The column to create the index on. If not specified,
|
||||
* it will create an index on vector field.
|
||||
*
|
||||
* @example
|
||||
*
|
||||
* By default, it creates vector idnex on one vector column.
|
||||
*
|
||||
* ```typescript
|
||||
* const table = await conn.openTable("my_table");
|
||||
* await table.createIndex().build();
|
||||
* ```
|
||||
*
|
||||
* You can specify `IVF_PQ` parameters via `ivf_pq({})` call.
|
||||
* ```typescript
|
||||
* const table = await conn.openTable("my_table");
|
||||
* await table.createIndex("my_vec_col")
|
||||
* .ivf_pq({ num_partitions: 128, num_sub_vectors: 16 })
|
||||
* .build();
|
||||
* ```
|
||||
*
|
||||
* Or create a Scalar index
|
||||
*
|
||||
* ```typescript
|
||||
* await table.createIndex("my_float_col").build();
|
||||
* ```
|
||||
*/
|
||||
createIndex(column?: string): IndexBuilder {
|
||||
let builder = new IndexBuilder(this.inner);
|
||||
if (column !== undefined) {
|
||||
builder = builder.column(column);
|
||||
}
|
||||
return builder;
|
||||
}
|
||||
|
||||
search(vector?: number[]): Query {
|
||||
const q = new Query(this);
|
||||
if (vector !== undefined) {
|
||||
q.vector(vector);
|
||||
}
|
||||
return q;
|
||||
}
|
||||
}
|
||||
@@ -87,16 +87,25 @@ class DBConnection(EnforceOverrides):
|
||||
Can be either "create" or "overwrite".
|
||||
By default, if the table already exists, an exception is raised.
|
||||
If you want to overwrite the table, use mode="overwrite".
|
||||
|
||||
**Note: this parameter is not yet supported on LanceDB Cloud**
|
||||
exist_ok: bool, default False
|
||||
If a table by the same name already exists, then raise an exception
|
||||
if exist_ok=False. If exist_ok=True, then open the existing table;
|
||||
it will not add the provided data but will validate against any
|
||||
schema that's specified.
|
||||
|
||||
**Note: this parameter is not yet supported on LanceDB Cloud**
|
||||
on_bad_vectors: str, default "error"
|
||||
What to do if any of the vectors are not the same size or contains NaNs.
|
||||
One of "error", "drop", "fill".
|
||||
fill_value: float
|
||||
The value to use when filling vectors. Only used if on_bad_vectors="fill".
|
||||
embedding_functions: List[EmbeddingFunctionConfig], default None
|
||||
The embedding functions to be applied before inserting data to
|
||||
LanceDB table.
|
||||
|
||||
**Note: this parameter is not yet supported on LanceDB Cloud**
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -230,7 +239,9 @@ class DBConnection(EnforceOverrides):
|
||||
def drop_database(self):
|
||||
"""
|
||||
Drop database
|
||||
This is the same thing as dropping all the tables
|
||||
This functions the same as dropping all the tables
|
||||
|
||||
**Note: this API is not yet available on LanceDB Cloud**
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@@ -54,16 +54,16 @@ class RemoteTable(Table):
|
||||
|
||||
def to_arrow(self) -> pa.Table:
|
||||
"""to_arrow() is not supported on the LanceDB cloud"""
|
||||
raise NotImplementedError("to_arrow() is not supported on the LanceDB cloud")
|
||||
raise NotImplementedError("to_arrow() is not yet supported on LanceDB Cloud")
|
||||
|
||||
def to_pandas(self):
|
||||
"""to_pandas() is not supported on the LanceDB cloud"""
|
||||
return NotImplementedError("to_pandas() is not supported on the LanceDB cloud")
|
||||
return NotImplementedError("to_pandas() is not yet supported on LanceDB Cloud")
|
||||
|
||||
def create_scalar_index(self, *args, **kwargs):
|
||||
"""Creates a scalar index"""
|
||||
return NotImplementedError(
|
||||
"create_scalar_index() is not supported on the LanceDB cloud"
|
||||
"create_scalar_index() is not yet supported on LanceDB Cloud"
|
||||
)
|
||||
|
||||
def create_index(
|
||||
|
||||
@@ -178,6 +178,7 @@ class Table(ABC):
|
||||
def to_pandas(self) -> "pd.DataFrame":
|
||||
"""Return the table as a pandas DataFrame.
|
||||
|
||||
**Note: this API is not yet available on LanceDB Cloud**
|
||||
Returns
|
||||
-------
|
||||
pd.DataFrame
|
||||
@@ -188,6 +189,7 @@ class Table(ABC):
|
||||
def to_arrow(self) -> pa.Table:
|
||||
"""Return the table as a pyarrow Table.
|
||||
|
||||
**Note: this API is not yet available on LanceDB Cloud**
|
||||
Returns
|
||||
-------
|
||||
pa.Table
|
||||
@@ -215,18 +217,26 @@ class Table(ABC):
|
||||
num_partitions: int, default 256
|
||||
The number of IVF partitions to use when creating the index.
|
||||
Default is 256.
|
||||
|
||||
**Note: this parameter is not supported on LanceDB Cloud**
|
||||
num_sub_vectors: int, default 96
|
||||
The number of PQ sub-vectors to use when creating the index.
|
||||
Default is 96.
|
||||
|
||||
**Note: this parameter is not supported on LanceDB Cloud**
|
||||
vector_column_name: str, default "vector"
|
||||
The vector column name to create the index.
|
||||
replace: bool, default True
|
||||
- If True, replace the existing index if it exists.
|
||||
|
||||
- If False, raise an error if duplicate index exists.
|
||||
|
||||
**Note: this parameter is not yet supported on LanceDB Cloud**
|
||||
accelerator: str, default None
|
||||
If set, use the given accelerator to create the index.
|
||||
Only support "cuda" for now.
|
||||
|
||||
**Note: this parameter is not yet supported on LanceDB Cloud**
|
||||
index_cache_size : int, optional
|
||||
The size of the index cache in number of entries. Default value is 256.
|
||||
"""
|
||||
@@ -241,6 +251,8 @@ class Table(ABC):
|
||||
):
|
||||
"""Create a scalar index on a column.
|
||||
|
||||
**Note: this API is not yet available on LanceDB Cloud**
|
||||
|
||||
Scalar indices, like vector indices, can be used to speed up scans. A scalar
|
||||
index can speed up scans that contain filter expressions on the indexed column.
|
||||
For example, the following scan will be faster if the column ``my_col`` has
|
||||
@@ -396,6 +408,8 @@ class Table(ABC):
|
||||
- If `query` is a string, then the query type is "vector" if the
|
||||
table has embedding functions else the query type is "fts"
|
||||
|
||||
**Note: this parameter is not yet supported on LanceDB Cloud**
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceQueryBuilder
|
||||
|
||||
@@ -3,7 +3,7 @@ name = "lancedb"
|
||||
version = "0.5.1"
|
||||
dependencies = [
|
||||
"deprecation",
|
||||
"pylance==0.9.7",
|
||||
"pylance==0.9.9",
|
||||
"ratelimiter~=1.0",
|
||||
"retry>=0.9.2",
|
||||
"tqdm>=4.27.0",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "vectordb-node"
|
||||
version = "0.4.3"
|
||||
version = "0.4.4"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license = "Apache-2.0"
|
||||
edition = "2018"
|
||||
|
||||
@@ -29,10 +29,14 @@ pub(crate) fn table_create_scalar_index(mut cx: FunctionContext) -> JsResult<JsP
|
||||
|
||||
let (deferred, promise) = cx.promise();
|
||||
let channel = cx.channel();
|
||||
let mut table = js_table.table.clone();
|
||||
let table = js_table.table.clone();
|
||||
|
||||
rt.spawn(async move {
|
||||
let idx_result = table.create_scalar_index(&column, replace).await;
|
||||
let idx_result = table
|
||||
.as_native()
|
||||
.unwrap()
|
||||
.create_scalar_index(&column, replace)
|
||||
.await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
idx_result.or_throw(&mut cx)?;
|
||||
|
||||
@@ -12,13 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use lance_index::vector::{ivf::IvfBuildParams, pq::PQBuildParams};
|
||||
use lance_linalg::distance::MetricType;
|
||||
use neon::context::FunctionContext;
|
||||
use neon::prelude::*;
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use vectordb::index::vector::{IvfPQIndexBuilder, VectorIndexBuilder};
|
||||
use vectordb::index::IndexBuilder;
|
||||
|
||||
use crate::error::Error::InvalidIndexType;
|
||||
use crate::error::ResultExt;
|
||||
@@ -29,17 +27,24 @@ use crate::table::JsTable;
|
||||
pub(crate) fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
||||
let index_params = cx.argument::<JsObject>(0)?;
|
||||
let index_params_builder = get_index_params_builder(&mut cx, index_params).or_throw(&mut cx)?;
|
||||
|
||||
let rt = runtime(&mut cx)?;
|
||||
|
||||
let (deferred, promise) = cx.promise();
|
||||
let channel = cx.channel();
|
||||
let mut table = js_table.table.clone();
|
||||
let table = js_table.table.clone();
|
||||
|
||||
let column_name = index_params
|
||||
.get_opt::<JsString, _, _>(&mut cx, "column")?
|
||||
.map(|s| s.value(&mut cx))
|
||||
.unwrap_or("vector".to_string()); // Backward compatibility
|
||||
|
||||
let tbl = table.clone();
|
||||
let mut index_builder = tbl.create_index(&[&column_name]);
|
||||
get_index_params_builder(&mut cx, index_params, &mut index_builder).or_throw(&mut cx)?;
|
||||
|
||||
rt.spawn(async move {
|
||||
let idx_result = table.create_index(&index_params_builder).await;
|
||||
|
||||
let idx_result = index_builder.build().await;
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
idx_result.or_throw(&mut cx)?;
|
||||
Ok(cx.boxed(JsTable::from(table)))
|
||||
@@ -51,66 +56,39 @@ pub(crate) fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsP
|
||||
fn get_index_params_builder(
|
||||
cx: &mut FunctionContext,
|
||||
obj: Handle<JsObject>,
|
||||
) -> crate::error::Result<impl VectorIndexBuilder> {
|
||||
let idx_type = obj.get::<JsString, _, _>(cx, "type")?.value(cx);
|
||||
|
||||
match idx_type.as_str() {
|
||||
"ivf_pq" => {
|
||||
let mut index_builder: IvfPQIndexBuilder = IvfPQIndexBuilder::new();
|
||||
let mut pq_params = PQBuildParams::default();
|
||||
|
||||
obj.get_opt::<JsString, _, _>(cx, "column")?
|
||||
.map(|s| index_builder.column(s.value(cx)));
|
||||
builder: &mut IndexBuilder,
|
||||
) -> crate::error::Result<()> {
|
||||
match obj.get::<JsString, _, _>(cx, "type")?.value(cx).as_str() {
|
||||
"ivf_pq" => builder.ivf_pq(),
|
||||
_ => {
|
||||
return Err(InvalidIndexType {
|
||||
index_type: "".into(),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
obj.get_opt::<JsString, _, _>(cx, "index_name")?
|
||||
.map(|s| index_builder.index_name(s.value(cx)));
|
||||
.map(|s| builder.name(s.value(cx).as_str()));
|
||||
|
||||
if let Some(metric_type) = obj.get_opt::<JsString, _, _>(cx, "metric_type")? {
|
||||
let metric_type = MetricType::try_from(metric_type.value(cx).as_str()).unwrap();
|
||||
index_builder.metric_type(metric_type);
|
||||
let metric_type = MetricType::try_from(metric_type.value(cx).as_str())?;
|
||||
builder.metric_type(metric_type);
|
||||
}
|
||||
|
||||
let num_partitions = obj.get_opt_usize(cx, "num_partitions")?;
|
||||
let max_iters = obj.get_opt_usize(cx, "max_iters")?;
|
||||
|
||||
num_partitions.map(|np| {
|
||||
let max_iters = max_iters.unwrap_or(50);
|
||||
let ivf_params = IvfBuildParams {
|
||||
num_partitions: np,
|
||||
max_iters,
|
||||
..Default::default()
|
||||
};
|
||||
index_builder.ivf_params(ivf_params)
|
||||
});
|
||||
|
||||
if let Some(use_opq) = obj.get_opt::<JsBoolean, _, _>(cx, "use_opq")? {
|
||||
pq_params.use_opq = use_opq.value(cx);
|
||||
if let Some(np) = obj.get_opt_u32(cx, "num_partitions")? {
|
||||
builder.num_partitions(np);
|
||||
}
|
||||
|
||||
if let Some(num_sub_vectors) = obj.get_opt_usize(cx, "num_sub_vectors")? {
|
||||
pq_params.num_sub_vectors = num_sub_vectors;
|
||||
if let Some(ns) = obj.get_opt_u32(cx, "num_sub_vectors")? {
|
||||
builder.num_sub_vectors(ns);
|
||||
}
|
||||
|
||||
if let Some(num_bits) = obj.get_opt_usize(cx, "num_bits")? {
|
||||
pq_params.num_bits = num_bits;
|
||||
if let Some(max_iters) = obj.get_opt_u32(cx, "max_iters")? {
|
||||
builder.max_iterations(max_iters);
|
||||
}
|
||||
|
||||
if let Some(max_iters) = obj.get_opt_usize(cx, "max_iters")? {
|
||||
pq_params.max_iters = max_iters;
|
||||
if let Some(num_bits) = obj.get_opt_u32(cx, "num_bits")? {
|
||||
builder.num_bits(num_bits);
|
||||
}
|
||||
|
||||
if let Some(max_opq_iters) = obj.get_opt_usize(cx, "max_opq_iters")? {
|
||||
pq_params.max_opq_iters = max_opq_iters;
|
||||
}
|
||||
|
||||
if let Some(replace) = obj.get_opt::<JsBoolean, _, _>(cx, "replace")? {
|
||||
index_builder.replace(replace.value(cx));
|
||||
}
|
||||
|
||||
Ok(index_builder)
|
||||
}
|
||||
index_type => Err(InvalidIndexType {
|
||||
index_type: index_type.into(),
|
||||
}),
|
||||
builder.replace(replace.value(cx));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use lance::io::object_store::ObjectStoreParams;
|
||||
use lance::io::ObjectStoreParams;
|
||||
use neon::prelude::*;
|
||||
use object_store::aws::{AwsCredential, AwsCredentialProvider};
|
||||
use object_store::CredentialProvider;
|
||||
|
||||
@@ -40,17 +40,6 @@ impl JsQuery {
|
||||
}
|
||||
projection_vec
|
||||
});
|
||||
let filter = query_obj
|
||||
.get_opt::<JsString, _, _>(&mut cx, "_filter")?
|
||||
.map(|s| s.value(&mut cx));
|
||||
let refine_factor = query_obj
|
||||
.get_opt_u32(&mut cx, "_refineFactor")
|
||||
.or_throw(&mut cx)?;
|
||||
let nprobes = query_obj.get_usize(&mut cx, "_nprobes").or_throw(&mut cx)?;
|
||||
let metric_type = query_obj
|
||||
.get_opt::<JsString, _, _>(&mut cx, "_metricType")?
|
||||
.map(|s| s.value(&mut cx))
|
||||
.map(|s| MetricType::try_from(s.as_str()).unwrap());
|
||||
|
||||
let prefilter = query_obj
|
||||
.get::<JsBoolean, _, _>(&mut cx, "_prefilter")?
|
||||
@@ -65,24 +54,41 @@ impl JsQuery {
|
||||
|
||||
let (deferred, promise) = cx.promise();
|
||||
let channel = cx.channel();
|
||||
let query_vector = query_obj.get_opt::<JsArray, _, _>(&mut cx, "_queryVector")?;
|
||||
let table = js_table.table.clone();
|
||||
let query = query_vector.map(|q| convert::js_array_to_vec(q.deref(), &mut cx));
|
||||
|
||||
rt.spawn(async move {
|
||||
let mut builder = table
|
||||
.search(query)
|
||||
.refine_factor(refine_factor)
|
||||
.nprobes(nprobes)
|
||||
.filter(filter)
|
||||
.metric_type(metric_type)
|
||||
.select(select)
|
||||
.prefilter(prefilter);
|
||||
let query_vector = query_obj.get_opt::<JsArray, _, _>(&mut cx, "_queryVector")?;
|
||||
let mut builder = table.query();
|
||||
if let Some(query) = query_vector.map(|q| convert::js_array_to_vec(q.deref(), &mut cx)) {
|
||||
builder = builder.nearest_to(&query);
|
||||
if let Some(metric_type) = query_obj
|
||||
.get_opt::<JsString, _, _>(&mut cx, "_metricType")?
|
||||
.map(|s| s.value(&mut cx))
|
||||
.map(|s| MetricType::try_from(s.as_str()).unwrap())
|
||||
{
|
||||
builder = builder.metric_type(metric_type);
|
||||
}
|
||||
|
||||
let nprobes = query_obj.get_usize(&mut cx, "_nprobes").or_throw(&mut cx)?;
|
||||
builder = builder.nprobes(nprobes);
|
||||
};
|
||||
|
||||
if let Some(filter) = query_obj
|
||||
.get_opt::<JsString, _, _>(&mut cx, "_filter")?
|
||||
.map(|s| s.value(&mut cx))
|
||||
{
|
||||
builder = builder.filter(filter);
|
||||
}
|
||||
if let Some(select) = select {
|
||||
builder = builder.select(select.as_slice());
|
||||
}
|
||||
if let Some(limit) = limit {
|
||||
builder = builder.limit(limit as usize);
|
||||
};
|
||||
|
||||
let record_batch_stream = builder.execute();
|
||||
builder = builder.prefilter(prefilter);
|
||||
|
||||
rt.spawn(async move {
|
||||
let record_batch_stream = builder.execute_stream();
|
||||
let results = record_batch_stream
|
||||
.and_then(|stream| {
|
||||
stream
|
||||
|
||||
@@ -15,24 +15,24 @@
|
||||
use arrow_array::{RecordBatch, RecordBatchIterator};
|
||||
use lance::dataset::optimize::CompactionOptions;
|
||||
use lance::dataset::{WriteMode, WriteParams};
|
||||
use lance::io::object_store::ObjectStoreParams;
|
||||
use lance::io::ObjectStoreParams;
|
||||
|
||||
use crate::arrow::{arrow_buffer_to_record_batch, record_batch_to_buffer};
|
||||
use neon::prelude::*;
|
||||
use neon::types::buffer::TypedArray;
|
||||
use vectordb::Table;
|
||||
use vectordb::TableRef;
|
||||
|
||||
use crate::error::ResultExt;
|
||||
use crate::{convert, get_aws_creds, get_aws_region, runtime, JsDatabase};
|
||||
|
||||
pub(crate) struct JsTable {
|
||||
pub table: Table,
|
||||
pub table: TableRef,
|
||||
}
|
||||
|
||||
impl Finalize for JsTable {}
|
||||
|
||||
impl From<Table> for JsTable {
|
||||
fn from(table: Table) -> Self {
|
||||
impl From<TableRef> for JsTable {
|
||||
fn from(table: TableRef) -> Self {
|
||||
JsTable { table }
|
||||
}
|
||||
}
|
||||
@@ -96,7 +96,7 @@ impl JsTable {
|
||||
arrow_buffer_to_record_batch(buffer.as_slice(&cx)).or_throw(&mut cx)?;
|
||||
let rt = runtime(&mut cx)?;
|
||||
let channel = cx.channel();
|
||||
let mut table = js_table.table.clone();
|
||||
let table = js_table.table.clone();
|
||||
|
||||
let (deferred, promise) = cx.promise();
|
||||
let write_mode = match write_mode.as_str() {
|
||||
@@ -118,7 +118,7 @@ impl JsTable {
|
||||
|
||||
rt.spawn(async move {
|
||||
let batch_reader = RecordBatchIterator::new(batches.into_iter().map(Ok), schema);
|
||||
let add_result = table.add(batch_reader, Some(params)).await;
|
||||
let add_result = table.add(Box::new(batch_reader), Some(params)).await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
add_result.or_throw(&mut cx)?;
|
||||
@@ -152,7 +152,7 @@ impl JsTable {
|
||||
let (deferred, promise) = cx.promise();
|
||||
let predicate = cx.argument::<JsString>(0)?.value(&mut cx);
|
||||
let channel = cx.channel();
|
||||
let mut table = js_table.table.clone();
|
||||
let table = js_table.table.clone();
|
||||
|
||||
rt.spawn(async move {
|
||||
let delete_result = table.delete(&predicate).await;
|
||||
@@ -167,7 +167,7 @@ impl JsTable {
|
||||
|
||||
pub(crate) fn js_update(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
||||
let mut table = js_table.table.clone();
|
||||
let table = js_table.table.clone();
|
||||
|
||||
let rt = runtime(&mut cx)?;
|
||||
let (deferred, promise) = cx.promise();
|
||||
@@ -218,7 +218,11 @@ impl JsTable {
|
||||
|
||||
let predicate = predicate.as_deref();
|
||||
|
||||
let update_result = table.update(predicate, updates_arg).await;
|
||||
let update_result = table
|
||||
.as_native()
|
||||
.unwrap()
|
||||
.update(predicate, updates_arg)
|
||||
.await;
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
update_result.or_throw(&mut cx)?;
|
||||
Ok(cx.boxed(JsTable::from(table)))
|
||||
@@ -249,6 +253,8 @@ impl JsTable {
|
||||
|
||||
rt.spawn(async move {
|
||||
let stats = table
|
||||
.as_native()
|
||||
.unwrap()
|
||||
.cleanup_old_versions(older_than, Some(delete_unverified))
|
||||
.await;
|
||||
|
||||
@@ -278,7 +284,7 @@ impl JsTable {
|
||||
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
||||
let rt = runtime(&mut cx)?;
|
||||
let (deferred, promise) = cx.promise();
|
||||
let mut table = js_table.table.clone();
|
||||
let table = js_table.table.clone();
|
||||
let channel = cx.channel();
|
||||
|
||||
let js_options = cx.argument::<JsObject>(0)?;
|
||||
@@ -310,7 +316,11 @@ impl JsTable {
|
||||
}
|
||||
|
||||
rt.spawn(async move {
|
||||
let stats = table.compact_files(options, None).await;
|
||||
let stats = table
|
||||
.as_native()
|
||||
.unwrap()
|
||||
.compact_files(options, None)
|
||||
.await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let stats = stats.or_throw(&mut cx)?;
|
||||
@@ -349,7 +359,7 @@ impl JsTable {
|
||||
let table = js_table.table.clone();
|
||||
|
||||
rt.spawn(async move {
|
||||
let indices = table.load_indices().await;
|
||||
let indices = table.as_native().unwrap().load_indices().await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let indices = indices.or_throw(&mut cx)?;
|
||||
@@ -389,8 +399,8 @@ impl JsTable {
|
||||
|
||||
rt.spawn(async move {
|
||||
let load_stats = futures::try_join!(
|
||||
table.count_indexed_rows(&index_uuid),
|
||||
table.count_unindexed_rows(&index_uuid)
|
||||
table.as_native().unwrap().count_indexed_rows(&index_uuid),
|
||||
table.as_native().unwrap().count_unindexed_rows(&index_uuid)
|
||||
);
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "vectordb"
|
||||
version = "0.4.3"
|
||||
version = "0.4.4"
|
||||
edition = "2021"
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license = "Apache-2.0"
|
||||
@@ -16,6 +16,7 @@ arrow-data = { workspace = true }
|
||||
arrow-schema = { workspace = true }
|
||||
arrow-ord = { workspace = true }
|
||||
arrow-cast = { workspace = true }
|
||||
arrow-ipc.workspace = true
|
||||
chrono = { workspace = true }
|
||||
object_store = { workspace = true }
|
||||
snafu = { workspace = true }
|
||||
@@ -25,11 +26,11 @@ lance-index = { workspace = true }
|
||||
lance-linalg = { workspace = true }
|
||||
lance-testing = { workspace = true }
|
||||
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
||||
log = { workspace = true }
|
||||
log.workspace = true
|
||||
async-trait = "0"
|
||||
bytes = "1"
|
||||
futures = "0"
|
||||
num-traits = "0"
|
||||
futures.workspace = true
|
||||
num-traits.workspace = true
|
||||
url = { workspace = true }
|
||||
serde = { version = "^1" }
|
||||
serde_json = { version = "1" }
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
# LanceDB Rust
|
||||
|
||||
Rust client for LanceDB, a serverless vector database. Read more at: https://lancedb.com/
|
||||
<a href="https://crates.io/crates/vectordb"></a>
|
||||
<a href="https://docs.rs/vectordb/latest/vectordb/"></a>
|
||||
|
||||
LanceDB Rust SDK, a serverless vector database.
|
||||
|
||||
Read more at: https://lancedb.com/
|
||||
|
||||
@@ -21,13 +21,13 @@ use std::sync::Arc;
|
||||
|
||||
use arrow_array::RecordBatchReader;
|
||||
use lance::dataset::WriteParams;
|
||||
use lance::io::object_store::{ObjectStore, WrappingObjectStore};
|
||||
use lance::io::{ObjectStore, WrappingObjectStore};
|
||||
use object_store::local::LocalFileSystem;
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::error::{CreateDirSnafu, Error, InvalidTableNameSnafu, Result};
|
||||
use crate::io::object_store::MirroringObjectStoreWrapper;
|
||||
use crate::table::{ReadParams, Table};
|
||||
use crate::table::{NativeTable, ReadParams, TableRef};
|
||||
|
||||
pub const LANCE_FILE_EXTENSION: &str = "lance";
|
||||
|
||||
@@ -46,17 +46,20 @@ pub trait Connection: Send + Sync {
|
||||
/// * `params` - Optional [`WriteParams`] to create the table.
|
||||
///
|
||||
/// # Returns
|
||||
/// Created [`Table`], or [`Err(Error::TableAlreadyExists)`] if the table already exists.
|
||||
/// Created [`TableRef`], or [`Err(Error::TableAlreadyExists)`] if the table already exists.
|
||||
async fn create_table(
|
||||
&self,
|
||||
name: &str,
|
||||
batches: Box<dyn RecordBatchReader + Send>,
|
||||
params: Option<WriteParams>,
|
||||
) -> Result<Table>;
|
||||
) -> Result<TableRef>;
|
||||
|
||||
async fn open_table(&self, name: &str) -> Result<Table>;
|
||||
async fn open_table(&self, name: &str) -> Result<TableRef> {
|
||||
self.open_table_with_params(name, ReadParams::default())
|
||||
.await
|
||||
}
|
||||
|
||||
async fn open_table_with_params(&self, name: &str, params: ReadParams) -> Result<Table>;
|
||||
async fn open_table_with_params(&self, name: &str, params: ReadParams) -> Result<TableRef>;
|
||||
|
||||
/// Drop a table in the database.
|
||||
///
|
||||
@@ -240,30 +243,19 @@ impl Connection for Database {
|
||||
name: &str,
|
||||
batches: Box<dyn RecordBatchReader + Send>,
|
||||
params: Option<WriteParams>,
|
||||
) -> Result<Table> {
|
||||
) -> Result<TableRef> {
|
||||
let table_uri = self.table_uri(name)?;
|
||||
|
||||
Table::create(
|
||||
Ok(Arc::new(
|
||||
NativeTable::create(
|
||||
&table_uri,
|
||||
name,
|
||||
batches,
|
||||
self.store_wrapper.clone(),
|
||||
params,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Open a table in the database.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `name` - The name of the table.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * A [Table] object.
|
||||
async fn open_table(&self, name: &str) -> Result<Table> {
|
||||
self.open_table_with_params(name, ReadParams::default())
|
||||
.await
|
||||
.await?,
|
||||
))
|
||||
}
|
||||
|
||||
/// Open a table in the database.
|
||||
@@ -274,10 +266,13 @@ impl Connection for Database {
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * A [Table] object.
|
||||
async fn open_table_with_params(&self, name: &str, params: ReadParams) -> Result<Table> {
|
||||
/// * A [TableRef] object.
|
||||
async fn open_table_with_params(&self, name: &str, params: ReadParams) -> Result<TableRef> {
|
||||
let table_uri = self.table_uri(name)?;
|
||||
Table::open_with_params(&table_uri, name, self.store_wrapper.clone(), params).await
|
||||
Ok(Arc::new(
|
||||
NativeTable::open_with_params(&table_uri, name, self.store_wrapper.clone(), params)
|
||||
.await?,
|
||||
))
|
||||
}
|
||||
|
||||
async fn drop_table(&self, name: &str) -> Result<()> {
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::PoisonError;
|
||||
|
||||
use arrow_schema::ArrowError;
|
||||
use snafu::Snafu;
|
||||
|
||||
@@ -35,6 +37,8 @@ pub enum Error {
|
||||
Lance { message: String },
|
||||
#[snafu(display("LanceDB Schema Error: {message}"))]
|
||||
Schema { message: String },
|
||||
#[snafu(display("Runtime error: {message}"))]
|
||||
Runtime { message: String },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -70,3 +74,11 @@ impl From<object_store::path::Error> for Error {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<PoisonError<T>> for Error {
|
||||
fn from(e: PoisonError<T>) -> Self {
|
||||
Self::Runtime {
|
||||
message: e.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Lance Developers.
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@@ -12,4 +12,281 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::{cmp::max, sync::Arc};
|
||||
|
||||
use lance_index::{DatasetIndexExt, IndexType};
|
||||
pub use lance_linalg::distance::MetricType;
|
||||
|
||||
pub mod vector;
|
||||
|
||||
use crate::{utils::default_vector_column, Error, Result, Table};
|
||||
|
||||
/// Index Parameters.
|
||||
pub enum IndexParams {
|
||||
Scalar {
|
||||
replace: bool,
|
||||
},
|
||||
IvfPq {
|
||||
replace: bool,
|
||||
metric_type: MetricType,
|
||||
num_partitions: u64,
|
||||
num_sub_vectors: u32,
|
||||
num_bits: u32,
|
||||
sample_rate: u32,
|
||||
max_iterations: u32,
|
||||
},
|
||||
}
|
||||
|
||||
/// Builder for Index Parameters.
|
||||
|
||||
pub struct IndexBuilder {
|
||||
table: Arc<dyn Table>,
|
||||
columns: Vec<String>,
|
||||
// General parameters
|
||||
/// Index name.
|
||||
name: Option<String>,
|
||||
/// Replace the existing index.
|
||||
replace: bool,
|
||||
|
||||
index_type: IndexType,
|
||||
|
||||
// Scalar index parameters
|
||||
// Nothing to set here.
|
||||
|
||||
// IVF_PQ parameters
|
||||
metric_type: MetricType,
|
||||
num_partitions: Option<u32>,
|
||||
// PQ related
|
||||
num_sub_vectors: Option<u32>,
|
||||
num_bits: u32,
|
||||
|
||||
/// The rate to find samples to train kmeans.
|
||||
sample_rate: u32,
|
||||
/// Max iteration to train kmeans.
|
||||
max_iterations: u32,
|
||||
}
|
||||
|
||||
impl IndexBuilder {
|
||||
pub(crate) fn new(table: Arc<dyn Table>, columns: &[&str]) -> Self {
|
||||
IndexBuilder {
|
||||
table,
|
||||
columns: columns.iter().map(|c| c.to_string()).collect(),
|
||||
name: None,
|
||||
replace: true,
|
||||
index_type: IndexType::Scalar,
|
||||
metric_type: MetricType::L2,
|
||||
num_partitions: None,
|
||||
num_sub_vectors: None,
|
||||
num_bits: 8,
|
||||
sample_rate: 256,
|
||||
max_iterations: 50,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a Scalar Index.
|
||||
///
|
||||
/// Accepted parameters:
|
||||
/// - `replace`: Replace the existing index.
|
||||
/// - `name`: Index name. Default: `None`
|
||||
pub fn scalar(&mut self) -> &mut Self {
|
||||
self.index_type = IndexType::Scalar;
|
||||
self
|
||||
}
|
||||
|
||||
/// Build an IVF PQ index.
|
||||
///
|
||||
/// Accepted parameters:
|
||||
/// - `replace`: Replace the existing index.
|
||||
/// - `name`: Index name. Default: `None`
|
||||
/// - `metric_type`: [MetricType] to use to build Vector Index.
|
||||
/// - `num_partitions`: Number of IVF partitions.
|
||||
/// - `num_sub_vectors`: Number of sub-vectors of PQ.
|
||||
/// - `num_bits`: Number of bits used for PQ centroids.
|
||||
/// - `sample_rate`: The rate to find samples to train kmeans.
|
||||
/// - `max_iterations`: Max iteration to train kmeans.
|
||||
pub fn ivf_pq(&mut self) -> &mut Self {
|
||||
self.index_type = IndexType::Vector;
|
||||
self
|
||||
}
|
||||
|
||||
/// The columns to build index on.
|
||||
pub fn columns(&mut self, cols: &[&str]) -> &mut Self {
|
||||
self.columns = cols.iter().map(|s| s.to_string()).collect();
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to replace the existing index, default is `true`.
|
||||
pub fn replace(&mut self, v: bool) -> &mut Self {
|
||||
self.replace = v;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index name.
|
||||
pub fn name(&mut self, name: &str) -> &mut Self {
|
||||
self.name = Some(name.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// [MetricType] to use to build Vector Index.
|
||||
///
|
||||
/// Default value is [MetricType::L2].
|
||||
pub fn metric_type(&mut self, metric_type: MetricType) -> &mut Self {
|
||||
self.metric_type = metric_type;
|
||||
self
|
||||
}
|
||||
|
||||
/// Number of IVF partitions.
|
||||
pub fn num_partitions(&mut self, num_partitions: u32) -> &mut Self {
|
||||
self.num_partitions = Some(num_partitions);
|
||||
self
|
||||
}
|
||||
|
||||
/// Number of sub-vectors of PQ.
|
||||
pub fn num_sub_vectors(&mut self, num_sub_vectors: u32) -> &mut Self {
|
||||
self.num_sub_vectors = Some(num_sub_vectors);
|
||||
self
|
||||
}
|
||||
|
||||
/// Number of bits used for PQ centroids.
|
||||
pub fn num_bits(&mut self, num_bits: u32) -> &mut Self {
|
||||
self.num_bits = num_bits;
|
||||
self
|
||||
}
|
||||
|
||||
/// The rate to find samples to train kmeans.
|
||||
pub fn sample_rate(&mut self, sample_rate: u32) -> &mut Self {
|
||||
self.sample_rate = sample_rate;
|
||||
self
|
||||
}
|
||||
|
||||
/// Max iteration to train kmeans.
|
||||
pub fn max_iterations(&mut self, max_iterations: u32) -> &mut Self {
|
||||
self.max_iterations = max_iterations;
|
||||
self
|
||||
}
|
||||
|
||||
/// Build the parameters.
|
||||
pub async fn build(&self) -> Result<()> {
|
||||
let schema = self.table.schema();
|
||||
|
||||
// TODO: simplify this after GH lance#1864.
|
||||
let mut index_type = &self.index_type;
|
||||
let columns = if self.columns.is_empty() {
|
||||
// By default we create vector index.
|
||||
index_type = &IndexType::Vector;
|
||||
vec![default_vector_column(&schema, None)?]
|
||||
} else {
|
||||
self.columns.clone()
|
||||
};
|
||||
|
||||
if columns.len() != 1 {
|
||||
return Err(Error::Schema {
|
||||
message: "Only one column is supported for index".to_string(),
|
||||
});
|
||||
}
|
||||
let column = &columns[0];
|
||||
|
||||
let field = schema.field_with_name(column)?;
|
||||
|
||||
let params = match index_type {
|
||||
IndexType::Scalar => IndexParams::Scalar {
|
||||
replace: self.replace,
|
||||
},
|
||||
IndexType::Vector => {
|
||||
let num_partitions = if let Some(n) = self.num_partitions {
|
||||
n
|
||||
} else {
|
||||
suggested_num_partitions(self.table.count_rows().await?)
|
||||
};
|
||||
let num_sub_vectors: u32 = if let Some(n) = self.num_sub_vectors {
|
||||
n
|
||||
} else {
|
||||
match field.data_type() {
|
||||
arrow_schema::DataType::FixedSizeList(_, n) => {
|
||||
Ok::<u32, Error>(suggested_num_sub_vectors(*n as u32))
|
||||
}
|
||||
_ => Err(Error::Schema {
|
||||
message: format!(
|
||||
"Column '{}' is not a FixedSizeList",
|
||||
&self.columns[0]
|
||||
),
|
||||
}),
|
||||
}?
|
||||
};
|
||||
IndexParams::IvfPq {
|
||||
replace: self.replace,
|
||||
metric_type: self.metric_type,
|
||||
num_partitions: num_partitions as u64,
|
||||
num_sub_vectors,
|
||||
num_bits: self.num_bits,
|
||||
sample_rate: self.sample_rate,
|
||||
max_iterations: self.max_iterations,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let tbl = self
|
||||
.table
|
||||
.as_native()
|
||||
.expect("Only native table is supported here");
|
||||
let mut dataset = tbl.clone_inner_dataset();
|
||||
match params {
|
||||
IndexParams::Scalar { replace } => {
|
||||
self.table
|
||||
.as_native()
|
||||
.unwrap()
|
||||
.create_scalar_index(column, replace)
|
||||
.await?
|
||||
}
|
||||
IndexParams::IvfPq {
|
||||
replace,
|
||||
metric_type,
|
||||
num_partitions,
|
||||
num_sub_vectors,
|
||||
num_bits,
|
||||
max_iterations,
|
||||
..
|
||||
} => {
|
||||
let lance_idx_params = lance::index::vector::VectorIndexParams::ivf_pq(
|
||||
num_partitions as usize,
|
||||
num_bits as u8,
|
||||
num_sub_vectors as usize,
|
||||
false,
|
||||
metric_type,
|
||||
max_iterations as usize,
|
||||
);
|
||||
dataset
|
||||
.create_index(
|
||||
&[column],
|
||||
IndexType::Vector,
|
||||
None,
|
||||
&lance_idx_params,
|
||||
replace,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
tbl.reset_dataset(dataset);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn suggested_num_partitions(rows: usize) -> u32 {
|
||||
let num_partitions = (rows as f64).sqrt() as u32;
|
||||
max(1, num_partitions)
|
||||
}
|
||||
|
||||
fn suggested_num_sub_vectors(dim: u32) -> u32 {
|
||||
if dim % 16 == 0 {
|
||||
// Should be more aggressive than this default.
|
||||
dim / 16
|
||||
} else if dim % 8 == 0 {
|
||||
dim / 8
|
||||
} else {
|
||||
log::warn!(
|
||||
"The dimension of the vector is not divisible by 8 or 16, \
|
||||
which may cause performance degradation in PQ"
|
||||
);
|
||||
1
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,104 +14,7 @@
|
||||
|
||||
use serde::Deserialize;
|
||||
|
||||
use lance::format::{Index, Manifest};
|
||||
use lance::index::vector::pq::PQBuildParams;
|
||||
use lance::index::vector::VectorIndexParams;
|
||||
use lance_index::vector::ivf::IvfBuildParams;
|
||||
use lance_linalg::distance::MetricType;
|
||||
|
||||
pub trait VectorIndexBuilder {
|
||||
fn get_column(&self) -> Option<String>;
|
||||
fn get_index_name(&self) -> Option<String>;
|
||||
fn build(&self) -> VectorIndexParams;
|
||||
|
||||
fn get_replace(&self) -> bool;
|
||||
}
|
||||
|
||||
pub struct IvfPQIndexBuilder {
|
||||
column: Option<String>,
|
||||
index_name: Option<String>,
|
||||
metric_type: Option<MetricType>,
|
||||
ivf_params: Option<IvfBuildParams>,
|
||||
pq_params: Option<PQBuildParams>,
|
||||
replace: bool,
|
||||
}
|
||||
|
||||
impl IvfPQIndexBuilder {
|
||||
pub fn new() -> IvfPQIndexBuilder {
|
||||
Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for IvfPQIndexBuilder {
|
||||
fn default() -> Self {
|
||||
IvfPQIndexBuilder {
|
||||
column: None,
|
||||
index_name: None,
|
||||
metric_type: None,
|
||||
ivf_params: None,
|
||||
pq_params: None,
|
||||
replace: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IvfPQIndexBuilder {
|
||||
pub fn column(&mut self, column: String) -> &mut IvfPQIndexBuilder {
|
||||
self.column = Some(column);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn index_name(&mut self, index_name: String) -> &mut IvfPQIndexBuilder {
|
||||
self.index_name = Some(index_name);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn metric_type(&mut self, metric_type: MetricType) -> &mut IvfPQIndexBuilder {
|
||||
self.metric_type = Some(metric_type);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn ivf_params(&mut self, ivf_params: IvfBuildParams) -> &mut IvfPQIndexBuilder {
|
||||
self.ivf_params = Some(ivf_params);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn pq_params(&mut self, pq_params: PQBuildParams) -> &mut IvfPQIndexBuilder {
|
||||
self.pq_params = Some(pq_params);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn replace(&mut self, replace: bool) -> &mut IvfPQIndexBuilder {
|
||||
self.replace = replace;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl VectorIndexBuilder for IvfPQIndexBuilder {
|
||||
fn get_column(&self) -> Option<String> {
|
||||
self.column.clone()
|
||||
}
|
||||
|
||||
fn get_index_name(&self) -> Option<String> {
|
||||
self.index_name.clone()
|
||||
}
|
||||
|
||||
fn build(&self) -> VectorIndexParams {
|
||||
let ivf_params = self.ivf_params.clone().unwrap_or_default();
|
||||
let pq_params = self.pq_params.clone().unwrap_or_default();
|
||||
|
||||
VectorIndexParams::with_ivf_pq_params(
|
||||
self.metric_type.unwrap_or(MetricType::L2),
|
||||
ivf_params,
|
||||
pq_params,
|
||||
)
|
||||
}
|
||||
|
||||
fn get_replace(&self) -> bool {
|
||||
self.replace
|
||||
}
|
||||
}
|
||||
use lance::table::format::{Index, Manifest};
|
||||
|
||||
pub struct VectorIndex {
|
||||
pub columns: Vec<String>,
|
||||
@@ -139,79 +42,3 @@ pub struct VectorIndexStatistics {
|
||||
pub num_indexed_rows: usize,
|
||||
pub num_unindexed_rows: usize,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use lance::index::vector::StageParams;
|
||||
use lance_index::vector::ivf::IvfBuildParams;
|
||||
use lance_index::vector::pq::PQBuildParams;
|
||||
|
||||
use crate::index::vector::{IvfPQIndexBuilder, VectorIndexBuilder};
|
||||
|
||||
#[test]
|
||||
fn test_builder_no_params() {
|
||||
let index_builder = IvfPQIndexBuilder::new();
|
||||
assert!(index_builder.get_column().is_none());
|
||||
assert!(index_builder.get_index_name().is_none());
|
||||
|
||||
let index_params = index_builder.build();
|
||||
assert_eq!(index_params.stages.len(), 2);
|
||||
if let StageParams::Ivf(ivf_params) = index_params.stages.get(0).unwrap() {
|
||||
let default = IvfBuildParams::default();
|
||||
assert_eq!(ivf_params.num_partitions, default.num_partitions);
|
||||
assert_eq!(ivf_params.max_iters, default.max_iters);
|
||||
} else {
|
||||
panic!("Expected first stage to be ivf")
|
||||
}
|
||||
|
||||
if let StageParams::PQ(pq_params) = index_params.stages.get(1).unwrap() {
|
||||
assert_eq!(pq_params.use_opq, false);
|
||||
} else {
|
||||
panic!("Expected second stage to be pq")
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_builder_all_params() {
|
||||
let mut index_builder = IvfPQIndexBuilder::new();
|
||||
|
||||
index_builder
|
||||
.column("c".to_owned())
|
||||
.metric_type(MetricType::Cosine)
|
||||
.index_name("index".to_owned());
|
||||
|
||||
assert_eq!(index_builder.column.clone().unwrap(), "c");
|
||||
assert_eq!(index_builder.metric_type.unwrap(), MetricType::Cosine);
|
||||
assert_eq!(index_builder.index_name.clone().unwrap(), "index");
|
||||
|
||||
let ivf_params = IvfBuildParams::new(500);
|
||||
let mut pq_params = PQBuildParams::default();
|
||||
pq_params.use_opq = true;
|
||||
pq_params.max_iters = 1;
|
||||
pq_params.num_bits = 8;
|
||||
pq_params.num_sub_vectors = 50;
|
||||
pq_params.max_opq_iters = 2;
|
||||
index_builder.ivf_params(ivf_params);
|
||||
index_builder.pq_params(pq_params);
|
||||
|
||||
let index_params = index_builder.build();
|
||||
assert_eq!(index_params.stages.len(), 2);
|
||||
if let StageParams::Ivf(ivf_params) = index_params.stages.get(0).unwrap() {
|
||||
assert_eq!(ivf_params.num_partitions, 500);
|
||||
} else {
|
||||
assert!(false, "Expected first stage to be ivf")
|
||||
}
|
||||
|
||||
if let StageParams::PQ(pq_params) = index_params.stages.get(1).unwrap() {
|
||||
assert_eq!(pq_params.use_opq, true);
|
||||
assert_eq!(pq_params.max_iters, 1);
|
||||
assert_eq!(pq_params.num_bits, 8);
|
||||
assert_eq!(pq_params.num_sub_vectors, 50);
|
||||
assert_eq!(pq_params.max_opq_iters, 2);
|
||||
} else {
|
||||
assert!(false, "Expected second stage to be pq")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ use std::{
|
||||
|
||||
use bytes::Bytes;
|
||||
use futures::{stream::BoxStream, FutureExt, StreamExt};
|
||||
use lance::io::object_store::WrappingObjectStore;
|
||||
use lance::io::WrappingObjectStore;
|
||||
use object_store::{
|
||||
path::Path, Error, GetOptions, GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore,
|
||||
PutOptions, PutResult, Result,
|
||||
@@ -335,14 +335,15 @@ impl WrappingObjectStore for MirroringObjectStoreWrapper {
|
||||
#[cfg(all(test, not(windows)))]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::connection::{Connection, Database};
|
||||
use arrow_array::PrimitiveArray;
|
||||
|
||||
use futures::TryStreamExt;
|
||||
use lance::{dataset::WriteParams, io::object_store::ObjectStoreParams};
|
||||
use lance::{dataset::WriteParams, io::ObjectStoreParams};
|
||||
use lance_testing::datagen::{BatchGenerator, IncrementingInt32, RandomVector};
|
||||
use object_store::local::LocalFileSystem;
|
||||
use tempfile;
|
||||
|
||||
use crate::connection::{Connection, Database};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_e2e() {
|
||||
let dir1 = tempfile::tempdir().unwrap().into_path();
|
||||
@@ -374,11 +375,9 @@ mod test {
|
||||
assert_eq!(t.count_rows().await.unwrap(), 100);
|
||||
|
||||
let q = t
|
||||
.search(Some(PrimitiveArray::from_iter_values(vec![
|
||||
0.1, 0.1, 0.1, 0.1,
|
||||
])))
|
||||
.search(&[0.1, 0.1, 0.1, 0.1])
|
||||
.limit(10)
|
||||
.execute()
|
||||
.execute_stream()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
||||
79
rust/vectordb/src/ipc.rs
Normal file
79
rust/vectordb/src/ipc.rs
Normal file
@@ -0,0 +1,79 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! IPC support
|
||||
|
||||
use std::io::Cursor;
|
||||
|
||||
use arrow_array::RecordBatchReader;
|
||||
use arrow_ipc::reader::StreamReader;
|
||||
|
||||
use crate::Result;
|
||||
|
||||
/// Convert a Arrow IPC file to a batch reader
|
||||
pub fn ipc_file_to_batches(buf: Vec<u8>) -> Result<impl RecordBatchReader> {
|
||||
let buf_reader = Cursor::new(buf);
|
||||
let reader = StreamReader::try_new(buf_reader, None)?;
|
||||
Ok(reader)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use arrow_array::{Float32Array, Int64Array, RecordBatch};
|
||||
use arrow_ipc::writer::StreamWriter;
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use std::sync::Arc;
|
||||
|
||||
fn create_record_batch() -> Result<RecordBatch> {
|
||||
let schema = Schema::new(vec![
|
||||
Field::new("a", DataType::Int64, false),
|
||||
Field::new("b", DataType::Float32, false),
|
||||
]);
|
||||
|
||||
let a = Int64Array::from(vec![1, 2, 3, 4, 5]);
|
||||
let b = Float32Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
|
||||
|
||||
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])?;
|
||||
|
||||
Ok(batch)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ipc_file_to_batches() -> Result<()> {
|
||||
let batch = create_record_batch()?;
|
||||
|
||||
let mut writer = StreamWriter::try_new(vec![], &batch.schema())?;
|
||||
writer.write(&batch)?;
|
||||
writer.finish()?;
|
||||
|
||||
let buf = writer.into_inner().unwrap();
|
||||
let mut reader = ipc_file_to_batches(buf).unwrap();
|
||||
let read_batch = reader.next().unwrap()?;
|
||||
|
||||
assert_eq!(batch.num_columns(), read_batch.num_columns());
|
||||
assert_eq!(batch.num_rows(), read_batch.num_rows());
|
||||
|
||||
for i in 0..batch.num_columns() {
|
||||
let batch_column = batch.column(i);
|
||||
let read_batch_column = read_batch.column(i);
|
||||
|
||||
assert_eq!(batch_column.data_type(), read_batch_column.data_type());
|
||||
assert_eq!(batch_column.len(), read_batch_column.len());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -46,8 +46,8 @@
|
||||
//! #### Connect to a database.
|
||||
//!
|
||||
//! ```rust
|
||||
//! use vectordb::{connection::{Database, Connection}, Table, WriteMode};
|
||||
//! use arrow_schema::{Field, Schema};
|
||||
//! use vectordb::connection::Database;
|
||||
//! # use arrow_schema::{Field, Schema};
|
||||
//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
|
||||
//! let db = Database::connect("data/sample-lancedb").await.unwrap();
|
||||
//! # });
|
||||
@@ -55,7 +55,7 @@
|
||||
//!
|
||||
//! LanceDB uses [arrow-rs](https://github.com/apache/arrow-rs) to define schema, data types and array itself.
|
||||
//! It treats [`FixedSizeList<Float16/Float32>`](https://docs.rs/arrow/latest/arrow/array/struct.FixedSizeListArray.html)
|
||||
//! columns as vectors.
|
||||
//! columns as vector columns.
|
||||
//!
|
||||
//! #### Create a table
|
||||
//!
|
||||
@@ -90,6 +90,27 @@
|
||||
//! # });
|
||||
//! ```
|
||||
//!
|
||||
//! #### Create vector index (IVF_PQ)
|
||||
//!
|
||||
//! ```no_run
|
||||
//! # use std::sync::Arc;
|
||||
//! # use vectordb::connection::{Database, Connection};
|
||||
//! # use arrow_array::{FixedSizeListArray, types::Float32Type, RecordBatch,
|
||||
//! # RecordBatchIterator, Int32Array};
|
||||
//! # use arrow_schema::{Schema, Field, DataType};
|
||||
//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
|
||||
//! # let tmpdir = tempfile::tempdir().unwrap();
|
||||
//! # let db = Database::connect(tmpdir.path().to_str().unwrap()).await.unwrap();
|
||||
//! # let tbl = db.open_table("idx_test").await.unwrap();
|
||||
//! tbl.create_index(&["vector"])
|
||||
//! .ivf_pq()
|
||||
//! .num_partitions(256)
|
||||
//! .build()
|
||||
//! .await
|
||||
//! .unwrap();
|
||||
//! # });
|
||||
//! ```
|
||||
//!
|
||||
//! #### Open table and run search
|
||||
//!
|
||||
//! ```rust
|
||||
@@ -119,8 +140,8 @@
|
||||
//! # db.create_table("my_table", Box::new(batches), None).await.unwrap();
|
||||
//! let table = db.open_table("my_table").await.unwrap();
|
||||
//! let results = table
|
||||
//! .search(Some(vec![1.0; 128]))
|
||||
//! .execute()
|
||||
//! .search(&[1.0; 128])
|
||||
//! .execute_stream()
|
||||
//! .await
|
||||
//! .unwrap()
|
||||
//! .try_collect::<Vec<_>>()
|
||||
@@ -136,11 +157,13 @@ pub mod data;
|
||||
pub mod error;
|
||||
pub mod index;
|
||||
pub mod io;
|
||||
pub mod ipc;
|
||||
pub mod query;
|
||||
pub mod table;
|
||||
pub mod utils;
|
||||
|
||||
pub use connection::Connection;
|
||||
pub use table::Table;
|
||||
pub use connection::{Connection, Database};
|
||||
pub use error::{Error, Result};
|
||||
pub use table::{Table, TableRef};
|
||||
|
||||
pub use lance::dataset::WriteMode;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Lance Developers.
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@@ -15,25 +15,42 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::Float32Array;
|
||||
use arrow_schema::Schema;
|
||||
use lance::dataset::scanner::{DatasetRecordBatchStream, Scanner};
|
||||
use lance::dataset::Dataset;
|
||||
use lance_linalg::distance::MetricType;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::utils::default_vector_column;
|
||||
|
||||
const DEFAULT_TOP_K: usize = 10;
|
||||
|
||||
/// A builder for nearest neighbor queries for LanceDB.
|
||||
#[derive(Clone)]
|
||||
pub struct Query {
|
||||
pub dataset: Arc<Dataset>,
|
||||
pub query_vector: Option<Float32Array>,
|
||||
pub column: String,
|
||||
pub limit: Option<usize>,
|
||||
pub filter: Option<String>,
|
||||
pub select: Option<Vec<String>>,
|
||||
pub nprobes: usize,
|
||||
pub refine_factor: Option<u32>,
|
||||
pub metric_type: Option<MetricType>,
|
||||
pub use_index: bool,
|
||||
pub prefilter: bool,
|
||||
dataset: Arc<Dataset>,
|
||||
|
||||
// The column to run the query on. If not specified, we will attempt to guess
|
||||
// the column based on the dataset's schema.
|
||||
column: Option<String>,
|
||||
|
||||
// IVF PQ - ANN search.
|
||||
query_vector: Option<Float32Array>,
|
||||
nprobes: usize,
|
||||
refine_factor: Option<u32>,
|
||||
metric_type: Option<MetricType>,
|
||||
|
||||
/// limit the number of rows to return.
|
||||
limit: Option<usize>,
|
||||
/// Apply filter to the returned rows.
|
||||
filter: Option<String>,
|
||||
/// Select column projection.
|
||||
select: Option<Vec<String>>,
|
||||
|
||||
/// Default is true. Set to false to enforce a brute force search.
|
||||
use_index: bool,
|
||||
/// Apply filter before ANN search/
|
||||
prefilter: bool,
|
||||
}
|
||||
|
||||
impl Query {
|
||||
@@ -41,17 +58,13 @@ impl Query {
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `dataset` - The table / dataset the query will be run against.
|
||||
/// * `vector` The vector used for this query.
|
||||
/// * `dataset` - Lance dataset.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * A [Query] object.
|
||||
pub(crate) fn new(dataset: Arc<Dataset>, vector: Option<Float32Array>) -> Self {
|
||||
pub(crate) fn new(dataset: Arc<Dataset>) -> Self {
|
||||
Query {
|
||||
dataset,
|
||||
query_vector: vector,
|
||||
column: crate::table::VECTOR_COLUMN_NAME.to_string(),
|
||||
query_vector: None,
|
||||
column: None,
|
||||
limit: None,
|
||||
nprobes: 20,
|
||||
refine_factor: None,
|
||||
@@ -63,17 +76,24 @@ impl Query {
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the queries and return its results.
|
||||
/// Convert the query plan to a [`DatasetRecordBatchStream`]
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * A [DatasetRecordBatchStream] with the query's results.
|
||||
pub async fn execute(&self) -> Result<DatasetRecordBatchStream> {
|
||||
pub async fn execute_stream(&self) -> Result<DatasetRecordBatchStream> {
|
||||
let mut scanner: Scanner = self.dataset.scan();
|
||||
|
||||
if let Some(query) = self.query_vector.as_ref() {
|
||||
// If there is a vector query, default to limit=10 if unspecified
|
||||
scanner.nearest(&self.column, query, self.limit.unwrap_or(10))?;
|
||||
let column = if let Some(col) = self.column.as_ref() {
|
||||
col.clone()
|
||||
} else {
|
||||
// Infer a vector column with the same dimension of the query vector.
|
||||
let arrow_schema = Schema::from(self.dataset.schema());
|
||||
default_vector_column(&arrow_schema, Some(query.len() as i32))?
|
||||
};
|
||||
scanner.nearest(&column, query, self.limit.unwrap_or(DEFAULT_TOP_K))?;
|
||||
} else {
|
||||
// If there is no vector query, it's ok to not have a limit
|
||||
scanner.limit(self.limit.map(|limit| limit as i64), None)?;
|
||||
@@ -94,8 +114,8 @@ impl Query {
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `column` - The column name
|
||||
pub fn column(mut self, column: &str) -> Query {
|
||||
self.column = column.into();
|
||||
pub fn column(mut self, column: &str) -> Self {
|
||||
self.column = Some(column.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
@@ -104,18 +124,18 @@ impl Query {
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `limit` - The maximum number of results to return.
|
||||
pub fn limit(mut self, limit: usize) -> Query {
|
||||
pub fn limit(mut self, limit: usize) -> Self {
|
||||
self.limit = Some(limit);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the vector used for this query.
|
||||
/// Find the nearest vectors to the given query vector.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `vector` - The vector that will be used for search.
|
||||
pub fn query_vector(mut self, query_vector: Float32Array) -> Query {
|
||||
self.query_vector = Some(query_vector);
|
||||
pub fn nearest_to(mut self, vector: &[f32]) -> Self {
|
||||
self.query_vector = Some(Float32Array::from(vector.to_vec()));
|
||||
self
|
||||
}
|
||||
|
||||
@@ -124,7 +144,7 @@ impl Query {
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `nprobes` - The number of probes to use.
|
||||
pub fn nprobes(mut self, nprobes: usize) -> Query {
|
||||
pub fn nprobes(mut self, nprobes: usize) -> Self {
|
||||
self.nprobes = nprobes;
|
||||
self
|
||||
}
|
||||
@@ -134,8 +154,8 @@ impl Query {
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `refine_factor` - The refine factor to use.
|
||||
pub fn refine_factor(mut self, refine_factor: Option<u32>) -> Query {
|
||||
self.refine_factor = refine_factor;
|
||||
pub fn refine_factor(mut self, refine_factor: u32) -> Self {
|
||||
self.refine_factor = Some(refine_factor);
|
||||
self
|
||||
}
|
||||
|
||||
@@ -144,8 +164,8 @@ impl Query {
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `metric_type` - The distance metric to use. By default [MetricType::L2] is used.
|
||||
pub fn metric_type(mut self, metric_type: Option<MetricType>) -> Query {
|
||||
self.metric_type = metric_type;
|
||||
pub fn metric_type(mut self, metric_type: MetricType) -> Self {
|
||||
self.metric_type = Some(metric_type);
|
||||
self
|
||||
}
|
||||
|
||||
@@ -154,7 +174,7 @@ impl Query {
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `use_index` - Sets Whether to use an ANN index if available
|
||||
pub fn use_index(mut self, use_index: bool) -> Query {
|
||||
pub fn use_index(mut self, use_index: bool) -> Self {
|
||||
self.use_index = use_index;
|
||||
self
|
||||
}
|
||||
@@ -163,21 +183,21 @@ impl Query {
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `filter` - value A filter in the same format used by a sql WHERE clause.
|
||||
pub fn filter(mut self, filter: Option<String>) -> Query {
|
||||
self.filter = filter;
|
||||
/// * `filter` - SQL filter
|
||||
pub fn filter(mut self, filter: impl AsRef<str>) -> Self {
|
||||
self.filter = Some(filter.as_ref().to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// Return only the specified columns.
|
||||
///
|
||||
/// Only select the specified columns. If not specified, all columns will be returned.
|
||||
pub fn select(mut self, columns: Option<Vec<String>>) -> Query {
|
||||
self.select = columns;
|
||||
pub fn select(mut self, columns: &[impl AsRef<str>]) -> Self {
|
||||
self.select = Some(columns.iter().map(|c| c.as_ref().to_string()).collect());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn prefilter(mut self, prefilter: bool) -> Query {
|
||||
pub fn prefilter(mut self, prefilter: bool) -> Self {
|
||||
self.prefilter = prefilter;
|
||||
self
|
||||
}
|
||||
@@ -196,8 +216,10 @@ mod tests {
|
||||
use futures::StreamExt;
|
||||
use lance::dataset::Dataset;
|
||||
use lance_testing::datagen::{BatchGenerator, IncrementingInt32, RandomVector};
|
||||
use tempfile::tempdir;
|
||||
|
||||
use crate::query::Query;
|
||||
use crate::table::{NativeTable, Table};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_setters_getters() {
|
||||
@@ -205,18 +227,18 @@ mod tests {
|
||||
let ds = Dataset::write(batches, "memory://foo", None).await.unwrap();
|
||||
|
||||
let vector = Some(Float32Array::from_iter_values([0.1, 0.2]));
|
||||
let query = Query::new(Arc::new(ds), vector.clone());
|
||||
let query = Query::new(Arc::new(ds)).nearest_to(&[0.1, 0.2]);
|
||||
assert_eq!(query.query_vector, vector);
|
||||
|
||||
let new_vector = Float32Array::from_iter_values([9.8, 8.7]);
|
||||
|
||||
let query = query
|
||||
.query_vector(new_vector.clone())
|
||||
.nearest_to(&[9.8, 8.7])
|
||||
.limit(100)
|
||||
.nprobes(1000)
|
||||
.use_index(true)
|
||||
.metric_type(Some(MetricType::Cosine))
|
||||
.refine_factor(Some(999));
|
||||
.metric_type(MetricType::Cosine)
|
||||
.refine_factor(999);
|
||||
|
||||
assert_eq!(query.query_vector.unwrap(), new_vector);
|
||||
assert_eq!(query.limit.unwrap(), 100);
|
||||
@@ -231,14 +253,8 @@ mod tests {
|
||||
let batches = make_non_empty_batches();
|
||||
let ds = Arc::new(Dataset::write(batches, "memory://foo", None).await.unwrap());
|
||||
|
||||
let vector = Some(Float32Array::from_iter_values([0.1; 4]));
|
||||
|
||||
let query = Query::new(ds.clone(), vector.clone());
|
||||
let result = query
|
||||
.limit(10)
|
||||
.filter(Some("id % 2 == 0".to_string()))
|
||||
.execute()
|
||||
.await;
|
||||
let query = Query::new(ds.clone()).nearest_to(&[0.1; 4]);
|
||||
let result = query.limit(10).filter("id % 2 == 0").execute_stream().await;
|
||||
let mut stream = result.expect("should have result");
|
||||
// should only have one batch
|
||||
while let Some(batch) = stream.next().await {
|
||||
@@ -246,12 +262,12 @@ mod tests {
|
||||
assert!(batch.expect("should be Ok").num_rows() < 10);
|
||||
}
|
||||
|
||||
let query = Query::new(ds, vector.clone());
|
||||
let query = Query::new(ds).nearest_to(&[0.1; 4]);
|
||||
let result = query
|
||||
.limit(10)
|
||||
.filter(Some("id % 2 == 0".to_string()))
|
||||
.filter(String::from("id % 2 == 0")) // Work with String too
|
||||
.prefilter(true)
|
||||
.execute()
|
||||
.execute_stream()
|
||||
.await;
|
||||
let mut stream = result.expect("should have result");
|
||||
// should only have one batch
|
||||
@@ -267,11 +283,8 @@ mod tests {
|
||||
let batches = make_non_empty_batches();
|
||||
let ds = Arc::new(Dataset::write(batches, "memory://foo", None).await.unwrap());
|
||||
|
||||
let query = Query::new(ds.clone(), None);
|
||||
let result = query
|
||||
.filter(Some("id % 2 == 0".to_string()))
|
||||
.execute()
|
||||
.await;
|
||||
let query = Query::new(ds.clone());
|
||||
let result = query.filter("id % 2 == 0").execute_stream().await;
|
||||
let mut stream = result.expect("should have result");
|
||||
// should only have one batch
|
||||
while let Some(batch) = stream.next().await {
|
||||
@@ -309,4 +322,21 @@ mod tests {
|
||||
schema,
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_search() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let dataset_path = tmp_dir.path().join("test.lance");
|
||||
let uri = dataset_path.to_str().unwrap();
|
||||
|
||||
let batches = make_test_batches();
|
||||
Dataset::write(batches, dataset_path.to_str().unwrap(), None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table = NativeTable::open(uri).await.unwrap();
|
||||
|
||||
let query = table.search(&[0.1, 0.2]);
|
||||
assert_eq!(&[0.1, 0.2], query.query_vector.unwrap().values());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2023 LanceDB Developers.
|
||||
// Copyright 2024 LanceDB Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@@ -12,72 +12,226 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! LanceDB Table APIs
|
||||
|
||||
use std::path::Path;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use arrow_array::RecordBatchReader;
|
||||
use arrow_schema::{Schema, SchemaRef};
|
||||
use chrono::Duration;
|
||||
use lance::dataset::builder::DatasetBuilder;
|
||||
use lance::index::scalar::ScalarIndexParams;
|
||||
use lance_index::optimize::OptimizeOptions;
|
||||
use lance_index::IndexType;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::{Float32Array, RecordBatchReader};
|
||||
use arrow_schema::SchemaRef;
|
||||
use lance::dataset::cleanup::RemovalStats;
|
||||
use lance::dataset::optimize::{
|
||||
compact_files, CompactionMetrics, CompactionOptions, IndexRemapperOptions,
|
||||
};
|
||||
pub use lance::dataset::ReadParams;
|
||||
use lance::dataset::{Dataset, UpdateBuilder, WriteParams};
|
||||
use lance::io::object_store::WrappingObjectStore;
|
||||
use lance_index::DatasetIndexExt;
|
||||
use std::path::Path;
|
||||
use lance::index::scalar::ScalarIndexParams;
|
||||
use lance::io::WrappingObjectStore;
|
||||
use lance_index::{optimize::OptimizeOptions, DatasetIndexExt, IndexType};
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
use crate::index::vector::{VectorIndex, VectorIndexBuilder, VectorIndexStatistics};
|
||||
use crate::index::vector::{VectorIndex, VectorIndexStatistics};
|
||||
use crate::index::IndexBuilder;
|
||||
use crate::query::Query;
|
||||
use crate::utils::{PatchReadParam, PatchWriteParam};
|
||||
use crate::WriteMode;
|
||||
|
||||
pub use lance::dataset::ReadParams;
|
||||
|
||||
pub const VECTOR_COLUMN_NAME: &str = "vector";
|
||||
|
||||
/// A Table is a collection of strong typed Rows.
|
||||
///
|
||||
/// The type of the each row is defined in Apache Arrow [Schema].
|
||||
#[async_trait::async_trait]
|
||||
pub trait Table: std::fmt::Display + Send + Sync {
|
||||
fn as_any(&self) -> &dyn std::any::Any;
|
||||
|
||||
/// Cast as [`NativeTable`], or return None it if is not a [`NativeTable`].
|
||||
fn as_native(&self) -> Option<&NativeTable>;
|
||||
|
||||
/// Get the name of the table.
|
||||
fn name(&self) -> &str;
|
||||
|
||||
/// Get the arrow [Schema] of the table.
|
||||
fn schema(&self) -> SchemaRef;
|
||||
|
||||
/// Count the number of rows in this dataset.
|
||||
async fn count_rows(&self) -> Result<usize>;
|
||||
|
||||
/// Insert new records into this Table
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `batches` RecordBatch to be saved in the Table
|
||||
/// * `params` Append / Overwrite existing records. Default: Append
|
||||
async fn add(
|
||||
&self,
|
||||
batches: Box<dyn RecordBatchReader + Send>,
|
||||
params: Option<WriteParams>,
|
||||
) -> Result<()>;
|
||||
|
||||
/// Delete the rows from table that match the predicate.
|
||||
///
|
||||
/// # Arguments
|
||||
/// - `predicate` - The SQL predicate string to filter the rows to be deleted.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```no_run
|
||||
/// # use std::sync::Arc;
|
||||
/// # use vectordb::connection::{Database, Connection};
|
||||
/// # use arrow_array::{FixedSizeListArray, types::Float32Type, RecordBatch,
|
||||
/// # RecordBatchIterator, Int32Array};
|
||||
/// # use arrow_schema::{Schema, Field, DataType};
|
||||
/// # tokio::runtime::Runtime::new().unwrap().block_on(async {
|
||||
/// let tmpdir = tempfile::tempdir().unwrap();
|
||||
/// let db = Database::connect(tmpdir.path().to_str().unwrap()).await.unwrap();
|
||||
/// # let schema = Arc::new(Schema::new(vec![
|
||||
/// # Field::new("id", DataType::Int32, false),
|
||||
/// # Field::new("vector", DataType::FixedSizeList(
|
||||
/// # Arc::new(Field::new("item", DataType::Float32, true)), 128), true),
|
||||
/// # ]));
|
||||
/// let batches = RecordBatchIterator::new(vec![
|
||||
/// RecordBatch::try_new(schema.clone(),
|
||||
/// vec![
|
||||
/// Arc::new(Int32Array::from_iter_values(0..10)),
|
||||
/// Arc::new(FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
|
||||
/// (0..10).map(|_| Some(vec![Some(1.0); 128])), 128)),
|
||||
/// ]).unwrap()
|
||||
/// ].into_iter().map(Ok),
|
||||
/// schema.clone());
|
||||
/// let tbl = db.create_table("delete_test", Box::new(batches), None).await.unwrap();
|
||||
/// tbl.delete("id > 5").await.unwrap();
|
||||
/// # });
|
||||
/// ```
|
||||
async fn delete(&self, predicate: &str) -> Result<()>;
|
||||
|
||||
/// Create an index on the column name.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```no_run
|
||||
/// # use std::sync::Arc;
|
||||
/// # use vectordb::connection::{Database, Connection};
|
||||
/// # use arrow_array::{FixedSizeListArray, types::Float32Type, RecordBatch,
|
||||
/// # RecordBatchIterator, Int32Array};
|
||||
/// # use arrow_schema::{Schema, Field, DataType};
|
||||
/// # tokio::runtime::Runtime::new().unwrap().block_on(async {
|
||||
/// let tmpdir = tempfile::tempdir().unwrap();
|
||||
/// let db = Database::connect(tmpdir.path().to_str().unwrap()).await.unwrap();
|
||||
/// # let tbl = db.open_table("idx_test").await.unwrap();
|
||||
/// tbl.create_index(&["vector"])
|
||||
/// .ivf_pq()
|
||||
/// .num_partitions(256)
|
||||
/// .build()
|
||||
/// .await
|
||||
/// .unwrap();
|
||||
/// # });
|
||||
/// ```
|
||||
fn create_index(&self, column: &[&str]) -> IndexBuilder;
|
||||
|
||||
/// Search the table with a given query vector.
|
||||
///
|
||||
/// This is a convenience method for preparing an ANN query.
|
||||
fn search(&self, query: &[f32]) -> Query {
|
||||
self.query().nearest_to(query)
|
||||
}
|
||||
|
||||
/// Create a generic [`Query`] Builder.
|
||||
///
|
||||
/// When appropriate, various indices and statistics based pruning will be used to
|
||||
/// accelerate the query.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ## Run a vector search (ANN) query.
|
||||
///
|
||||
/// ```no_run
|
||||
/// # use arrow_array::RecordBatch;
|
||||
/// # use futures::TryStreamExt;
|
||||
/// # tokio::runtime::Runtime::new().unwrap().block_on(async {
|
||||
/// # let tbl = vectordb::table::NativeTable::open("/tmp/tbl").await.unwrap();
|
||||
/// let stream = tbl.query().nearest_to(&[1.0, 2.0, 3.0])
|
||||
/// .refine_factor(5)
|
||||
/// .nprobes(10)
|
||||
/// .execute_stream()
|
||||
/// .await
|
||||
/// .unwrap();
|
||||
/// let batches: Vec<RecordBatch> = stream.try_collect().await.unwrap();
|
||||
/// # });
|
||||
/// ```
|
||||
///
|
||||
/// ## Run a SQL-style filter
|
||||
/// ```no_run
|
||||
/// # use arrow_array::RecordBatch;
|
||||
/// # use futures::TryStreamExt;
|
||||
/// # tokio::runtime::Runtime::new().unwrap().block_on(async {
|
||||
/// # let tbl = vectordb::table::NativeTable::open("/tmp/tbl").await.unwrap();
|
||||
/// let stream = tbl
|
||||
/// .query()
|
||||
/// .filter("id > 5")
|
||||
/// .limit(1000)
|
||||
/// .execute_stream()
|
||||
/// .await
|
||||
/// .unwrap();
|
||||
/// let batches: Vec<RecordBatch> = stream.try_collect().await.unwrap();
|
||||
/// # });
|
||||
/// ```
|
||||
///
|
||||
/// ## Run a full scan query.
|
||||
/// ```no_run
|
||||
/// # use arrow_array::RecordBatch;
|
||||
/// # use futures::TryStreamExt;
|
||||
/// # tokio::runtime::Runtime::new().unwrap().block_on(async {
|
||||
/// # let tbl = vectordb::table::NativeTable::open("/tmp/tbl").await.unwrap();
|
||||
/// let stream = tbl
|
||||
/// .query()
|
||||
/// .execute_stream()
|
||||
/// .await
|
||||
/// .unwrap();
|
||||
/// let batches: Vec<RecordBatch> = stream.try_collect().await.unwrap();
|
||||
/// # });
|
||||
/// ```
|
||||
fn query(&self) -> Query;
|
||||
}
|
||||
|
||||
/// Reference to a Table pointer.
|
||||
pub type TableRef = Arc<dyn Table>;
|
||||
|
||||
/// A table in a LanceDB database.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Table {
|
||||
pub struct NativeTable {
|
||||
name: String,
|
||||
uri: String,
|
||||
dataset: Arc<Dataset>,
|
||||
dataset: Arc<Mutex<Dataset>>,
|
||||
|
||||
// the object store wrapper to use on write path
|
||||
store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Table {
|
||||
impl std::fmt::Display for NativeTable {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "Table({})", self.name)
|
||||
}
|
||||
}
|
||||
|
||||
impl Table {
|
||||
impl NativeTable {
|
||||
/// Opens an existing Table
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `uri` - The uri to a [Table]
|
||||
/// * `uri` - The uri to a [NativeTable]
|
||||
/// * `name` - The table name
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * A [Table] object.
|
||||
/// * A [NativeTable] object.
|
||||
pub async fn open(uri: &str) -> Result<Self> {
|
||||
let name = Self::get_table_name(uri)?;
|
||||
Self::open_with_params(uri, &name, None, ReadParams::default()).await
|
||||
}
|
||||
|
||||
/// Open an Table with a given name.
|
||||
pub async fn open_with_name(uri: &str, name: &str) -> Result<Self> {
|
||||
Self::open_with_params(uri, name, None, ReadParams::default()).await
|
||||
}
|
||||
|
||||
/// Opens an existing Table
|
||||
///
|
||||
/// # Arguments
|
||||
@@ -88,7 +242,7 @@ impl Table {
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * A [Table] object.
|
||||
/// * A [NativeTable] object.
|
||||
pub async fn open_with_params(
|
||||
uri: &str,
|
||||
name: &str,
|
||||
@@ -113,25 +267,26 @@ impl Table {
|
||||
message: e.to_string(),
|
||||
},
|
||||
})?;
|
||||
Ok(Table {
|
||||
Ok(NativeTable {
|
||||
name: name.to_string(),
|
||||
uri: uri.to_string(),
|
||||
dataset: Arc::new(dataset),
|
||||
dataset: Arc::new(Mutex::new(dataset)),
|
||||
store_wrapper: write_store_wrapper,
|
||||
})
|
||||
}
|
||||
|
||||
/// Checkout a specific version of this [`Table`]
|
||||
/// Make a new clone of the internal lance dataset.
|
||||
pub(crate) fn clone_inner_dataset(&self) -> Dataset {
|
||||
self.dataset.lock().expect("Lock poison").clone()
|
||||
}
|
||||
|
||||
/// Checkout a specific version of this [NativeTable]
|
||||
///
|
||||
pub async fn checkout(uri: &str, version: u64) -> Result<Self> {
|
||||
let name = Self::get_table_name(uri)?;
|
||||
Self::checkout_with_params(uri, &name, version, None, ReadParams::default()).await
|
||||
}
|
||||
|
||||
pub async fn checkout_with_name(uri: &str, name: &str, version: u64) -> Result<Self> {
|
||||
Self::checkout_with_params(uri, name, version, None, ReadParams::default()).await
|
||||
}
|
||||
|
||||
pub async fn checkout_with_params(
|
||||
uri: &str,
|
||||
name: &str,
|
||||
@@ -154,26 +309,27 @@ impl Table {
|
||||
message: e.to_string(),
|
||||
},
|
||||
})?;
|
||||
Ok(Table {
|
||||
Ok(NativeTable {
|
||||
name: name.to_string(),
|
||||
uri: uri.to_string(),
|
||||
dataset: Arc::new(dataset),
|
||||
dataset: Arc::new(Mutex::new(dataset)),
|
||||
store_wrapper: write_store_wrapper,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn checkout_latest(&self) -> Result<Self> {
|
||||
let latest_version_id = self.dataset.latest_version_id().await?;
|
||||
let dataset = if latest_version_id == self.dataset.version().version {
|
||||
self.dataset.clone()
|
||||
let dataset = self.clone_inner_dataset();
|
||||
let latest_version_id = dataset.latest_version_id().await?;
|
||||
let dataset = if latest_version_id == dataset.version().version {
|
||||
dataset
|
||||
} else {
|
||||
Arc::new(self.dataset.checkout_version(latest_version_id).await?)
|
||||
dataset.checkout_version(latest_version_id).await?
|
||||
};
|
||||
|
||||
Ok(Table {
|
||||
Ok(Self {
|
||||
name: self.name.clone(),
|
||||
uri: self.uri.clone(),
|
||||
dataset,
|
||||
dataset: Arc::new(Mutex::new(dataset)),
|
||||
store_wrapper: self.store_wrapper.clone(),
|
||||
})
|
||||
}
|
||||
@@ -203,8 +359,8 @@ impl Table {
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * A [Table] object.
|
||||
pub async fn create(
|
||||
/// * A [TableImpl] object.
|
||||
pub(crate) async fn create(
|
||||
uri: &str,
|
||||
name: &str,
|
||||
batches: impl RecordBatchReader + Send + 'static,
|
||||
@@ -227,46 +383,22 @@ impl Table {
|
||||
message: e.to_string(),
|
||||
},
|
||||
})?;
|
||||
Ok(Table {
|
||||
Ok(NativeTable {
|
||||
name: name.to_string(),
|
||||
uri: uri.to_string(),
|
||||
dataset: Arc::new(dataset),
|
||||
dataset: Arc::new(Mutex::new(dataset)),
|
||||
store_wrapper: write_store_wrapper,
|
||||
})
|
||||
}
|
||||
|
||||
/// Schema of this Table.
|
||||
pub fn schema(&self) -> SchemaRef {
|
||||
Arc::new(self.dataset.schema().into())
|
||||
}
|
||||
|
||||
/// Version of this Table
|
||||
pub fn version(&self) -> u64 {
|
||||
self.dataset.version().version
|
||||
}
|
||||
|
||||
/// Create index on the table.
|
||||
pub async fn create_index(&mut self, index_builder: &impl VectorIndexBuilder) -> Result<()> {
|
||||
let mut dataset = self.dataset.as_ref().clone();
|
||||
dataset
|
||||
.create_index(
|
||||
&[index_builder
|
||||
.get_column()
|
||||
.unwrap_or(VECTOR_COLUMN_NAME.to_string())
|
||||
.as_str()],
|
||||
IndexType::Vector,
|
||||
index_builder.get_index_name(),
|
||||
&index_builder.build(),
|
||||
index_builder.get_replace(),
|
||||
)
|
||||
.await?;
|
||||
self.dataset = Arc::new(dataset);
|
||||
Ok(())
|
||||
self.dataset.lock().expect("lock poison").version().version
|
||||
}
|
||||
|
||||
/// Create a scalar index on the table
|
||||
pub async fn create_scalar_index(&mut self, column: &str, replace: bool) -> Result<()> {
|
||||
let mut dataset = self.dataset.as_ref().clone();
|
||||
pub async fn create_scalar_index(&self, column: &str, replace: bool) -> Result<()> {
|
||||
let mut dataset = self.clone_inner_dataset();
|
||||
let params = ScalarIndexParams::default();
|
||||
dataset
|
||||
.create_index(&[column], IndexType::Scalar, None, ¶ms, replace)
|
||||
@@ -275,61 +407,21 @@ impl Table {
|
||||
}
|
||||
|
||||
pub async fn optimize_indices(&mut self, options: &OptimizeOptions) -> Result<()> {
|
||||
let mut dataset = self.dataset.as_ref().clone();
|
||||
let mut dataset = self.clone_inner_dataset();
|
||||
dataset.optimize_indices(options).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Insert records into this Table
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `batches` RecordBatch to be saved in the Table
|
||||
/// * `write_mode` Append / Overwrite existing records. Default: Append
|
||||
/// # Returns
|
||||
///
|
||||
/// * The number of rows added
|
||||
pub async fn add(
|
||||
&mut self,
|
||||
batches: impl RecordBatchReader + Send + 'static,
|
||||
params: Option<WriteParams>,
|
||||
) -> Result<()> {
|
||||
let params = Some(params.unwrap_or(WriteParams {
|
||||
mode: WriteMode::Append,
|
||||
..WriteParams::default()
|
||||
}));
|
||||
|
||||
// patch the params if we have a write store wrapper
|
||||
let params = match self.store_wrapper.clone() {
|
||||
Some(wrapper) => params.patch_with_store_wrapper(wrapper)?,
|
||||
None => params,
|
||||
};
|
||||
|
||||
self.dataset = Arc::new(Dataset::write(batches, &self.uri, params).await?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Creates a new Query object that can be executed.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `query_vector` The vector used for this query.
|
||||
///
|
||||
/// # Returns
|
||||
/// * A [Query] object.
|
||||
pub fn search<T: Into<Float32Array>>(&self, query_vector: Option<T>) -> Query {
|
||||
Query::new(self.dataset.clone(), query_vector.map(|q| q.into()))
|
||||
pub fn query(&self) -> Query {
|
||||
Query::new(self.clone_inner_dataset().into())
|
||||
}
|
||||
|
||||
pub fn filter(&self, expr: String) -> Query {
|
||||
Query::new(self.dataset.clone(), None).filter(Some(expr))
|
||||
Query::new(self.clone_inner_dataset().into()).filter(expr)
|
||||
}
|
||||
|
||||
/// Returns the number of rows in this Table
|
||||
pub async fn count_rows(&self) -> Result<usize> {
|
||||
Ok(self.dataset.count_rows().await?)
|
||||
}
|
||||
|
||||
/// Merge new data into this table.
|
||||
pub async fn merge(
|
||||
@@ -338,26 +430,14 @@ impl Table {
|
||||
left_on: &str,
|
||||
right_on: &str,
|
||||
) -> Result<()> {
|
||||
let mut dataset = self.dataset.as_ref().clone();
|
||||
let mut dataset = self.clone_inner_dataset();
|
||||
dataset.merge(batches, left_on, right_on).await?;
|
||||
self.dataset = Arc::new(dataset);
|
||||
self.dataset = Arc::new(Mutex::new(dataset));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete rows from the table
|
||||
pub async fn delete(&mut self, predicate: &str) -> Result<()> {
|
||||
let mut dataset = self.dataset.as_ref().clone();
|
||||
dataset.delete(predicate).await?;
|
||||
self.dataset = Arc::new(dataset);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn update(
|
||||
&mut self,
|
||||
predicate: Option<&str>,
|
||||
updates: Vec<(&str, &str)>,
|
||||
) -> Result<()> {
|
||||
let mut builder = UpdateBuilder::new(self.dataset.clone());
|
||||
pub async fn update(&self, predicate: Option<&str>, updates: Vec<(&str, &str)>) -> Result<()> {
|
||||
let mut builder = UpdateBuilder::new(self.clone_inner_dataset().into());
|
||||
if let Some(predicate) = predicate {
|
||||
builder = builder.update_where(predicate)?;
|
||||
}
|
||||
@@ -367,9 +447,8 @@ impl Table {
|
||||
}
|
||||
|
||||
let operation = builder.build()?;
|
||||
let new_ds = operation.execute().await?;
|
||||
self.dataset = new_ds;
|
||||
|
||||
let ds = operation.execute().await?;
|
||||
self.reset_dataset(ds.as_ref().clone());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -389,8 +468,8 @@ impl Table {
|
||||
older_than: Duration,
|
||||
delete_unverified: Option<bool>,
|
||||
) -> Result<RemovalStats> {
|
||||
Ok(self
|
||||
.dataset
|
||||
let dataset = self.clone_inner_dataset();
|
||||
Ok(dataset
|
||||
.cleanup_old_versions(older_than, delete_unverified)
|
||||
.await?)
|
||||
}
|
||||
@@ -402,26 +481,28 @@ impl Table {
|
||||
///
|
||||
/// This calls into [lance::dataset::optimize::compact_files].
|
||||
pub async fn compact_files(
|
||||
&mut self,
|
||||
&self,
|
||||
options: CompactionOptions,
|
||||
remap_options: Option<Arc<dyn IndexRemapperOptions>>,
|
||||
) -> Result<CompactionMetrics> {
|
||||
let mut dataset = self.dataset.as_ref().clone();
|
||||
let mut dataset = self.clone_inner_dataset();
|
||||
let metrics = compact_files(&mut dataset, options, remap_options).await?;
|
||||
self.dataset = Arc::new(dataset);
|
||||
self.reset_dataset(dataset);
|
||||
Ok(metrics)
|
||||
}
|
||||
|
||||
pub fn count_fragments(&self) -> usize {
|
||||
self.dataset.count_fragments()
|
||||
self.dataset.lock().expect("lock poison").count_fragments()
|
||||
}
|
||||
|
||||
pub async fn count_deleted_rows(&self) -> Result<usize> {
|
||||
Ok(self.dataset.count_deleted_rows().await?)
|
||||
let dataset = self.clone_inner_dataset();
|
||||
Ok(dataset.count_deleted_rows().await?)
|
||||
}
|
||||
|
||||
pub async fn num_small_files(&self, max_rows_per_group: usize) -> usize {
|
||||
self.dataset.num_small_files(max_rows_per_group).await
|
||||
let dataset = self.clone_inner_dataset();
|
||||
dataset.num_small_files(max_rows_per_group).await
|
||||
}
|
||||
|
||||
pub async fn count_indexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
|
||||
@@ -439,8 +520,8 @@ impl Table {
|
||||
}
|
||||
|
||||
pub async fn load_indices(&self) -> Result<Vec<VectorIndex>> {
|
||||
let (indices, mf) =
|
||||
futures::try_join!(self.dataset.load_indices(), self.dataset.latest_manifest())?;
|
||||
let dataset = self.clone_inner_dataset();
|
||||
let (indices, mf) = futures::try_join!(dataset.load_indices(), dataset.latest_manifest())?;
|
||||
Ok(indices
|
||||
.iter()
|
||||
.map(|i| VectorIndex::new_from_format(&mf, i))
|
||||
@@ -456,10 +537,8 @@ impl Table {
|
||||
if index.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
let index_stats = self
|
||||
.dataset
|
||||
.index_statistics(&index.unwrap().index_name)
|
||||
.await?;
|
||||
let dataset = self.clone_inner_dataset();
|
||||
let index_stats = dataset.index_statistics(&index.unwrap().index_name).await?;
|
||||
let index_stats: VectorIndexStatistics =
|
||||
serde_json::from_str(&index_stats).map_err(|e| Error::Lance {
|
||||
message: format!(
|
||||
@@ -470,6 +549,71 @@ impl Table {
|
||||
|
||||
Ok(Some(index_stats))
|
||||
}
|
||||
|
||||
pub(crate) fn reset_dataset(&self, dataset: Dataset) {
|
||||
*self.dataset.lock().expect("lock poison") = dataset;
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Table for NativeTable {
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_native(&self) -> Option<&NativeTable> {
|
||||
Some(self)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
self.name.as_str()
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
let lance_schema = { self.dataset.lock().expect("lock poison").schema().clone() };
|
||||
Arc::new(Schema::from(&lance_schema))
|
||||
}
|
||||
|
||||
async fn count_rows(&self) -> Result<usize> {
|
||||
let dataset = { self.dataset.lock().expect("lock poison").clone() };
|
||||
Ok(dataset.count_rows().await?)
|
||||
}
|
||||
|
||||
async fn add(
|
||||
&self,
|
||||
batches: Box<dyn RecordBatchReader + Send>,
|
||||
params: Option<WriteParams>,
|
||||
) -> Result<()> {
|
||||
let params = Some(params.unwrap_or(WriteParams {
|
||||
mode: WriteMode::Append,
|
||||
..WriteParams::default()
|
||||
}));
|
||||
|
||||
// patch the params if we have a write store wrapper
|
||||
let params = match self.store_wrapper.clone() {
|
||||
Some(wrapper) => params.patch_with_store_wrapper(wrapper)?,
|
||||
None => params,
|
||||
};
|
||||
|
||||
self.reset_dataset(Dataset::write(batches, &self.uri, params).await?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_index(&self, columns: &[&str]) -> IndexBuilder {
|
||||
IndexBuilder::new(Arc::new(self.clone()), columns)
|
||||
}
|
||||
|
||||
fn query(&self) -> Query {
|
||||
Query::new(Arc::new(self.dataset.lock().expect("lock poison").clone()))
|
||||
}
|
||||
|
||||
/// Delete rows from the table
|
||||
async fn delete(&self, predicate: &str) -> Result<()> {
|
||||
let mut dataset = self.clone_inner_dataset();
|
||||
dataset.delete(predicate).await?;
|
||||
self.reset_dataset(dataset);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -487,14 +631,11 @@ mod tests {
|
||||
use arrow_schema::{DataType, Field, Schema, TimeUnit};
|
||||
use futures::TryStreamExt;
|
||||
use lance::dataset::{Dataset, WriteMode};
|
||||
use lance::index::vector::pq::PQBuildParams;
|
||||
use lance::io::object_store::{ObjectStoreParams, WrappingObjectStore};
|
||||
use lance_index::vector::ivf::IvfBuildParams;
|
||||
use lance::io::{ObjectStoreParams, WrappingObjectStore};
|
||||
use rand::Rng;
|
||||
use tempfile::tempdir;
|
||||
|
||||
use super::*;
|
||||
use crate::index::vector::IvfPQIndexBuilder;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_open() {
|
||||
@@ -506,7 +647,9 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table = Table::open(dataset_path.to_str().unwrap()).await.unwrap();
|
||||
let table = NativeTable::open(dataset_path.to_str().unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(table.name, "test")
|
||||
}
|
||||
@@ -515,7 +658,7 @@ mod tests {
|
||||
async fn test_open_not_found() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let table = Table::open(uri).await;
|
||||
let table = NativeTable::open(uri).await;
|
||||
assert!(matches!(table.unwrap_err(), Error::TableNotFound { .. }));
|
||||
}
|
||||
|
||||
@@ -535,12 +678,12 @@ mod tests {
|
||||
|
||||
let batches = make_test_batches();
|
||||
let _ = batches.schema().clone();
|
||||
Table::create(&uri, "test", batches, None, None)
|
||||
NativeTable::create(&uri, "test", batches, None, None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let batches = make_test_batches();
|
||||
let result = Table::create(&uri, "test", batches, None, None).await;
|
||||
let result = NativeTable::create(&uri, "test", batches, None, None).await;
|
||||
assert!(matches!(
|
||||
result.unwrap_err(),
|
||||
Error::TableAlreadyExists { .. }
|
||||
@@ -554,7 +697,7 @@ mod tests {
|
||||
|
||||
let batches = make_test_batches();
|
||||
let schema = batches.schema().clone();
|
||||
let mut table = Table::create(&uri, "test", batches, None, None)
|
||||
let table = NativeTable::create(&uri, "test", batches, None, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(table.count_rows().await.unwrap(), 10);
|
||||
@@ -570,7 +713,7 @@ mod tests {
|
||||
schema.clone(),
|
||||
);
|
||||
|
||||
table.add(new_batches, None).await.unwrap();
|
||||
table.add(Box::new(new_batches), None).await.unwrap();
|
||||
assert_eq!(table.count_rows().await.unwrap(), 20);
|
||||
assert_eq!(table.name, "test");
|
||||
}
|
||||
@@ -582,7 +725,7 @@ mod tests {
|
||||
|
||||
let batches = make_test_batches();
|
||||
let schema = batches.schema().clone();
|
||||
let mut table = Table::create(uri, "test", batches, None, None)
|
||||
let table = NativeTable::create(uri, "test", batches, None, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(table.count_rows().await.unwrap(), 10);
|
||||
@@ -603,7 +746,7 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
table.add(new_batches, Some(param)).await.unwrap();
|
||||
table.add(Box::new(new_batches), Some(param)).await.unwrap();
|
||||
assert_eq!(table.count_rows().await.unwrap(), 10);
|
||||
assert_eq!(table.name, "test");
|
||||
}
|
||||
@@ -636,7 +779,7 @@ mod tests {
|
||||
);
|
||||
|
||||
Dataset::write(record_batch_iter, uri, None).await.unwrap();
|
||||
let mut table = Table::open(uri).await.unwrap();
|
||||
let table = NativeTable::open(uri).await.unwrap();
|
||||
|
||||
table
|
||||
.update(Some("id > 5"), vec![("name", "'foo'")])
|
||||
@@ -768,7 +911,7 @@ mod tests {
|
||||
);
|
||||
|
||||
Dataset::write(record_batch_iter, uri, None).await.unwrap();
|
||||
let mut table = Table::open(uri).await.unwrap();
|
||||
let table = NativeTable::open(uri).await.unwrap();
|
||||
|
||||
// check it can do update for each type
|
||||
let updates: Vec<(&str, &str)> = vec![
|
||||
@@ -874,24 +1017,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_search() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let dataset_path = tmp_dir.path().join("test.lance");
|
||||
let uri = dataset_path.to_str().unwrap();
|
||||
|
||||
let batches = make_test_batches();
|
||||
Dataset::write(batches, dataset_path.to_str().unwrap(), None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table = Table::open(uri).await.unwrap();
|
||||
|
||||
let vector = Float32Array::from_iter_values([0.1, 0.2]);
|
||||
let query = table.search(Some(vector.clone()));
|
||||
assert_eq!(vector, query.query_vector.unwrap());
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
struct NoOpCacheWrapper {
|
||||
called: AtomicBool,
|
||||
@@ -933,7 +1058,7 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
assert!(!wrapper.called());
|
||||
let _ = Table::open_with_params(uri, "test", None, param)
|
||||
let _ = NativeTable::open_with_params(uri, "test", None, param)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(wrapper.called());
|
||||
@@ -987,23 +1112,23 @@ mod tests {
|
||||
schema,
|
||||
);
|
||||
|
||||
let mut table = Table::create(uri, "test", batches, None, None)
|
||||
let table = NativeTable::create(uri, "test", batches, None, None)
|
||||
.await
|
||||
.unwrap();
|
||||
let mut i = IvfPQIndexBuilder::new();
|
||||
|
||||
assert_eq!(table.count_indexed_rows("my_index").await.unwrap(), None);
|
||||
assert_eq!(table.count_unindexed_rows("my_index").await.unwrap(), None);
|
||||
|
||||
let index_builder = i
|
||||
.column("embeddings".to_string())
|
||||
.index_name("my_index".to_string())
|
||||
.ivf_params(IvfBuildParams::new(256))
|
||||
.pq_params(PQBuildParams::default());
|
||||
table
|
||||
.create_index(&["embeddings"])
|
||||
.ivf_pq()
|
||||
.name("my_index")
|
||||
.num_partitions(256)
|
||||
.build()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
table.create_index(index_builder).await.unwrap();
|
||||
|
||||
assert_eq!(table.dataset.load_indices().await.unwrap().len(), 1);
|
||||
assert_eq!(table.load_indices().await.unwrap().len(), 1);
|
||||
assert_eq!(table.count_rows().await.unwrap(), 512);
|
||||
assert_eq!(table.name, "test");
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use lance::{
|
||||
dataset::{ReadParams, WriteParams},
|
||||
io::object_store::{ObjectStoreParams, WrappingObjectStore},
|
||||
};
|
||||
use arrow_schema::Schema;
|
||||
|
||||
use lance::dataset::{ReadParams, WriteParams};
|
||||
use lance::io::{ObjectStoreParams, WrappingObjectStore};
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
|
||||
@@ -65,3 +65,86 @@ impl PatchReadParam for ReadParams {
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Find one default column to create index.
|
||||
pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result<String> {
|
||||
// Try to find one fixed size list array column.
|
||||
let candidates = schema
|
||||
.fields()
|
||||
.iter()
|
||||
.filter_map(|field| match field.data_type() {
|
||||
arrow_schema::DataType::FixedSizeList(f, d)
|
||||
if f.data_type().is_floating()
|
||||
&& dim.map(|expect| *d == expect).unwrap_or(true) =>
|
||||
{
|
||||
Some(field.name())
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
if candidates.is_empty() {
|
||||
Err(Error::Store {
|
||||
message: "No vector column found to create index".to_string(),
|
||||
})
|
||||
} else if candidates.len() != 1 {
|
||||
Err(Error::Store {
|
||||
message: format!(
|
||||
"More than one vector columns found, \
|
||||
please specify which column to create index: {:?}",
|
||||
candidates
|
||||
),
|
||||
})
|
||||
} else {
|
||||
Ok(candidates[0].to_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use arrow_schema::{DataType, Field};
|
||||
|
||||
#[test]
|
||||
fn test_guess_default_column() {
|
||||
let schema_no_vector = Schema::new(vec![
|
||||
Field::new("id", DataType::Int16, true),
|
||||
Field::new("tag", DataType::Utf8, false),
|
||||
]);
|
||||
assert!(default_vector_column(&schema_no_vector, None)
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("No vector column"));
|
||||
|
||||
let schema_with_vec_col = Schema::new(vec![
|
||||
Field::new("id", DataType::Int16, true),
|
||||
Field::new(
|
||||
"vec",
|
||||
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float64, false)), 10),
|
||||
false,
|
||||
),
|
||||
]);
|
||||
assert_eq!(
|
||||
default_vector_column(&schema_with_vec_col, None).unwrap(),
|
||||
"vec"
|
||||
);
|
||||
|
||||
let multi_vec_col = Schema::new(vec![
|
||||
Field::new("id", DataType::Int16, true),
|
||||
Field::new(
|
||||
"vec",
|
||||
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float64, false)), 10),
|
||||
false,
|
||||
),
|
||||
Field::new(
|
||||
"vec2",
|
||||
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float64, false)), 50),
|
||||
false,
|
||||
),
|
||||
]);
|
||||
assert!(default_vector_column(&multi_vec_col, None)
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("More than one"));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user