mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 05:19:58 +00:00
docs(nodejs): add @lancedb/lancedb examples everywhere (#1411)
Co-authored-by: Will Jones <willjones127@gmail.com>
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -4,6 +4,7 @@
|
||||
**/__pycache__
|
||||
.DS_Store
|
||||
venv
|
||||
.venv
|
||||
|
||||
.vscode
|
||||
.zed
|
||||
|
||||
@@ -18,4 +18,4 @@ repos:
|
||||
language: system
|
||||
types: [text]
|
||||
files: "nodejs/.*"
|
||||
exclude: nodejs/lancedb/native.d.ts|nodejs/dist/.*
|
||||
exclude: nodejs/lancedb/native.d.ts|nodejs/dist/.*|nodejs/examples/.*
|
||||
|
||||
@@ -38,7 +38,21 @@ Lance supports `IVF_PQ` index type by default.
|
||||
tbl.create_index(num_partitions=256, num_sub_vectors=96)
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "TypeScript"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
Creating indexes is done via the [lancedb.Table.createIndex](../js/classes/Table.md/#createIndex) method.
|
||||
|
||||
```typescript
|
||||
--8<--- "nodejs/examples/ann_indexes.ts:import"
|
||||
|
||||
--8<-- "nodejs/examples/ann_indexes.ts:ingest"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
Creating indexes is done via the [lancedb.Table.createIndex](../javascript/interfaces/Table.md/#createIndex) method.
|
||||
|
||||
```typescript
|
||||
--8<--- "docs/src/ann_indexes.ts:import"
|
||||
@@ -150,7 +164,15 @@ There are a couple of parameters that can be used to fine-tune the search:
|
||||
1 [0.48587373, 0.269207, 0.15095535, 0.65531915,... item 3953 108.393867
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "TypeScript"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/ann_indexes.ts:search1"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/ann_indexes.ts:search1"
|
||||
@@ -176,7 +198,15 @@ You can further filter the elements returned by a search using a where clause.
|
||||
tbl.search(np.random.random((1536))).where("item != 'item 1141'").to_pandas()
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "TypeScript"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/ann_indexes.ts:search2"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```javascript
|
||||
--8<-- "docs/src/ann_indexes.ts:search2"
|
||||
@@ -200,7 +230,15 @@ You can select the columns returned by the query using a select clause.
|
||||
...
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "TypeScript"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/ann_indexes.ts:search3"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/ann_indexes.ts:search3"
|
||||
|
||||
@@ -16,12 +16,43 @@
|
||||
pip install lancedb
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```shell
|
||||
npm install @lancedb/lancedb
|
||||
```
|
||||
!!! note "Bundling `@lancedb/lancedb` apps with Webpack"
|
||||
|
||||
Since LanceDB contains a prebuilt Node binary, you must configure `next.config.js` to exclude it from webpack. This is required for both using Next.js and deploying a LanceDB app on Vercel.
|
||||
|
||||
```javascript
|
||||
/** @type {import('next').NextConfig} */
|
||||
module.exports = ({
|
||||
webpack(config) {
|
||||
config.externals.push({ '@lancedb/lancedb': '@lancedb/lancedb' })
|
||||
return config;
|
||||
}
|
||||
})
|
||||
```
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```shell
|
||||
npm install vectordb
|
||||
```
|
||||
!!! note "Bundling `vectordb` apps with Webpack"
|
||||
|
||||
Since LanceDB contains a prebuilt Node binary, you must configure `next.config.js` to exclude it from webpack. This is required for both using Next.js and deploying a LanceDB app on Vercel.
|
||||
|
||||
```javascript
|
||||
/** @type {import('next').NextConfig} */
|
||||
module.exports = ({
|
||||
webpack(config) {
|
||||
config.externals.push({ vectordb: 'vectordb' })
|
||||
return config;
|
||||
}
|
||||
})
|
||||
```
|
||||
=== "Rust"
|
||||
|
||||
```shell
|
||||
@@ -58,7 +89,14 @@ recommend switching to stable releases.
|
||||
pip install --pre --extra-index-url https://pypi.fury.io/lancedb/ lancedb
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```shell
|
||||
npm install @lancedb/lancedb@preview
|
||||
```
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```shell
|
||||
npm install vectordb@preview
|
||||
@@ -93,23 +131,22 @@ recommend switching to stable releases.
|
||||
use the same syntax as the asynchronous API. To help with this migration we
|
||||
have created a [migration guide](migration.md) detailing the differences.
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:import"
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import * as arrow from "apache-arrow";
|
||||
|
||||
--8<-- "docs/src/basic_legacy.ts:open_db"
|
||||
--8<-- "nodejs/examples/basic.ts:connect"
|
||||
```
|
||||
|
||||
!!! note "`@lancedb/lancedb` vs. `vectordb`"
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
The Javascript SDK was originally released as `vectordb`. In an effort to
|
||||
reduce maintenance we are aligning our SDKs. The new, aligned, Javascript
|
||||
API is being released as `lancedb`. If you are starting new work we encourage
|
||||
you to try out `lancedb`. Once the new API is feature complete we will begin
|
||||
slowly deprecating `vectordb` in favor of `lancedb`. There is a
|
||||
[migration guide](migration.md) detailing the differences which will assist
|
||||
you in this process.
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:open_db"
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
@@ -152,14 +189,22 @@ table.
|
||||
--8<-- "python/python/tests/docs/test_basic.py:create_table_async_pandas"
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:create_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:create_table"
|
||||
```
|
||||
|
||||
If the table already exists, LanceDB will raise an error by default.
|
||||
If you want to overwrite the table, you can pass in `mode="overwrite"`
|
||||
If you want to overwrite the table, you can pass in `mode:"overwrite"`
|
||||
to the `createTable` function.
|
||||
|
||||
=== "Rust"
|
||||
@@ -200,7 +245,15 @@ similar to a `CREATE TABLE` statement in SQL.
|
||||
!!! note "You can define schema in Pydantic"
|
||||
LanceDB comes with Pydantic support, which allows you to define the schema of your data using Pydantic models. This makes it easy to work with LanceDB tables and data. Learn more about all supported types in [tables guide](./guides/tables.md).
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:create_empty_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:create_empty_table"
|
||||
@@ -223,12 +276,20 @@ Once created, you can open a table as follows:
|
||||
--8<-- "python/python/tests/docs/test_basic.py:open_table_async"
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:open_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
const tbl = await db.openTable("myTable");
|
||||
```
|
||||
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
@@ -244,9 +305,16 @@ If you forget the name of your table, you can always get a listing of all table
|
||||
--8<-- "python/python/tests/docs/test_basic.py:table_names_async"
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
=== "Typescript[^1]"
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```javascript
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:table_names"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
console.log(await db.tableNames());
|
||||
```
|
||||
|
||||
@@ -267,7 +335,14 @@ After a table has been created, you can always add more data to it as follows:
|
||||
--8<-- "python/python/tests/docs/test_basic.py:add_data_async"
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:add_data"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:add"
|
||||
@@ -292,7 +367,14 @@ Once you've embedded the query, you can find its nearest neighbors as follows:
|
||||
|
||||
This returns a pandas DataFrame with the results.
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:vector_search"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:search"
|
||||
@@ -325,7 +407,14 @@ LanceDB allows you to create an ANN index on a table as follows:
|
||||
--8<-- "python/python/tests/docs/test_basic.py:create_index_async"
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:create_index"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```{.typescript .ignore}
|
||||
--8<-- "docs/src/basic_legacy.ts:create_index"
|
||||
@@ -357,7 +446,15 @@ This can delete any number of rows that match the filter.
|
||||
--8<-- "python/python/tests/docs/test_basic.py:delete_rows_async"
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:delete_rows"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:delete"
|
||||
@@ -378,7 +475,13 @@ simple or complex as needed. To see what expressions are supported, see the
|
||||
|
||||
Read more: [lancedb.table.Table.delete][]
|
||||
|
||||
=== "Javascript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
Read more: [lancedb.Table.delete](javascript/interfaces/Table.md#delete)
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
Read more: [vectordb.Table.delete](javascript/interfaces/Table.md#delete)
|
||||
|
||||
@@ -401,7 +504,15 @@ Use the `drop_table()` method on the database to remove a table.
|
||||
By default, if the table does not exist an exception is raised. To suppress this,
|
||||
you can pass in `ignore_missing=True`.
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:drop_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:drop_table"
|
||||
@@ -416,19 +527,6 @@ Use the `drop_table()` method on the database to remove a table.
|
||||
--8<-- "rust/lancedb/examples/simple.rs:drop_table"
|
||||
```
|
||||
|
||||
!!! note "Bundling `vectordb` apps with Webpack"
|
||||
|
||||
If you're using the `vectordb` module in JavaScript, since LanceDB contains a prebuilt Node binary, you must configure `next.config.js` to exclude it from webpack. This is required for both using Next.js and deploying a LanceDB app on Vercel.
|
||||
|
||||
```javascript
|
||||
/** @type {import('next').NextConfig} */
|
||||
module.exports = ({
|
||||
webpack(config) {
|
||||
config.externals.push({ vectordb: 'vectordb' })
|
||||
return config;
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
## Using the Embedding API
|
||||
You can use the embedding API when working with embedding models. It automatically vectorizes the data at ingestion and query time and comes with built-in integrations with popular embedding models like Openai, Hugging Face, Sentence Transformers, CLIP and more.
|
||||
@@ -440,6 +538,22 @@ You can use the embedding API when working with embedding models. It automatical
|
||||
--8<-- "python/python/tests/docs/test_embeddings_optional.py:openai_embeddings"
|
||||
```
|
||||
|
||||
=== "Typescript[^1]"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/embedding.ts:imports"
|
||||
--8<-- "nodejs/examples/embedding.ts:openai_embeddings"
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
--8<-- "rust/lancedb/examples/openai.rs:imports"
|
||||
--8<-- "rust/lancedb/examples/openai.rs:openai_embeddings"
|
||||
```
|
||||
|
||||
Learn about using the existing integrations and creating custom embedding functions in the [embedding API guide](./embeddings/).
|
||||
|
||||
|
||||
@@ -448,3 +562,5 @@ Learn about using the existing integrations and creating custom embedding functi
|
||||
This section covered the very basics of using LanceDB. If you're learning about vector databases for the first time, you may want to read the page on [indexing](concepts/index_ivfpq.md) to get familiar with the concepts.
|
||||
|
||||
If you've already worked with other vector databases, you may want to read the [guides](guides/tables.md) to learn how to work with LanceDB in more detail.
|
||||
|
||||
[^1]: The `vectordb` package is a legacy package that is deprecated in favor of `@lancedb/lancedb`. The `vectordb` package will continue to receive bug fixes and security updates until September 2024. We recommend all new projects use `@lancedb/lancedb`. See the [migration guide](migration.md) for more information.
|
||||
|
||||
@@ -24,6 +24,7 @@ const example = async () => {
|
||||
);
|
||||
// --8<-- [end:create_table]
|
||||
|
||||
|
||||
// --8<-- [start:add]
|
||||
const newData = Array.from({ length: 500 }, (_, i) => ({
|
||||
vector: [i, i + 1],
|
||||
|
||||
@@ -29,17 +29,32 @@ For this purpose, LanceDB introduces an **embedding functions API**, that allow
|
||||
You can also define your own embedding function by implementing the `EmbeddingFunction`
|
||||
abstract base interface. It subclasses Pydantic Model which can be utilized to write complex schemas simply as we'll see next!
|
||||
|
||||
=== "JavaScript""
|
||||
=== "TypeScript"
|
||||
In the TypeScript SDK, the choices are more limited. For now, only the OpenAI
|
||||
embedding function is available.
|
||||
|
||||
```javascript
|
||||
const lancedb = require("vectordb");
|
||||
import * as lancedb from '@lancedb/lancedb'
|
||||
import { getRegistry } from '@lancedb/lancedb/embeddings'
|
||||
|
||||
// You need to provide an OpenAI API key
|
||||
const apiKey = "sk-..."
|
||||
// The embedding function will create embeddings for the 'text' column
|
||||
const embedding = new lancedb.OpenAIEmbeddingFunction('text', apiKey)
|
||||
const func = getRegistry().get("openai").create({apiKey})
|
||||
```
|
||||
=== "Rust"
|
||||
In the Rust SDK, the choices are more limited. For now, only the OpenAI
|
||||
embedding function is available. But unlike the Python and TypeScript SDKs, you need manually register the OpenAI embedding function.
|
||||
|
||||
```toml
|
||||
// Make sure to include the `openai` feature
|
||||
[dependencies]
|
||||
lancedb = {version = "*", features = ["openai"]}
|
||||
```
|
||||
|
||||
```rust
|
||||
--8<-- "rust/lancedb/examples/openai.rs:imports"
|
||||
--8<-- "rust/lancedb/examples/openai.rs:openai_embeddings"
|
||||
```
|
||||
|
||||
## 2. Define the data model or schema
|
||||
@@ -55,7 +70,7 @@ For this purpose, LanceDB introduces an **embedding functions API**, that allow
|
||||
|
||||
`VectorField` tells LanceDB to use the clip embedding function to generate query embeddings for the `vector` column and `SourceField` ensures that when adding data, we automatically use the specified embedding function to encode `image_uri`.
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
For the TypeScript SDK, a schema can be inferred from input data, or an explicit
|
||||
Arrow schema can be provided.
|
||||
@@ -74,9 +89,18 @@ the embeddings at all:
|
||||
table.add([{"image_uri": u} for u in uris])
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/embedding.ts:imports"
|
||||
--8<-- "nodejs/examples/embedding.ts:embedding_function"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
const db = await lancedb.connect("data/sample-lancedb");
|
||||
const data = [
|
||||
{ text: "pepperoni"},
|
||||
@@ -116,9 +140,19 @@ need to worry about it when you query the table:
|
||||
|
||||
Both of the above snippet returns a pandas DataFrame with the 10 closest vectors to the query.
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
const results = await table.search("What's the best pizza topping?")
|
||||
.limit(10)
|
||||
.toArray()
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)
|
||||
|
||||
```ts
|
||||
const results = await table
|
||||
.search("What's the best pizza topping?")
|
||||
.limit(10)
|
||||
|
||||
@@ -7,7 +7,7 @@ LanceDB supports 3 methods of working with embeddings.
|
||||
|
||||
1. You can manually generate embeddings for the data and queries. This is done outside of LanceDB.
|
||||
2. You can use the built-in [embedding functions](./embedding_functions.md) to embed the data and queries in the background.
|
||||
3. For python users, you can define your own [custom embedding function](./custom_embedding_function.md)
|
||||
3. You can define your own [custom embedding function](./custom_embedding_function.md)
|
||||
that extends the default embedding functions.
|
||||
|
||||
For python users, there is also a legacy [with_embeddings API](./legacy.md).
|
||||
@@ -18,8 +18,11 @@ It is retained for compatibility and will be removed in a future version.
|
||||
To get started with embeddings, you can use the built-in embedding functions.
|
||||
|
||||
### OpenAI Embedding function
|
||||
|
||||
LanceDB registers the OpenAI embeddings function in the registry as `openai`. You can pass any supported model name to the `create`. By default it uses `"text-embedding-ada-002"`.
|
||||
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
@@ -45,9 +48,24 @@ actual = table.search(query).limit(1).to_pydantic(Words)[0]
|
||||
print(actual.text)
|
||||
```
|
||||
|
||||
=== "TypeScript"
|
||||
|
||||
```typescript
|
||||
--8<--- "nodejs/examples/embedding.ts:imports"
|
||||
--8<--- "nodejs/examples/embedding.ts:openai_embeddings"
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
--8<--- "rust/lancedb/examples/openai.rs:imports"
|
||||
--8<--- "rust/lancedb/examples/openai.rs:openai_embeddings"
|
||||
```
|
||||
|
||||
### Sentence Transformers Embedding function
|
||||
LanceDB registers the Sentence Transformers embeddings function in the registry as `sentence-transformers`. You can pass any supported model name to the `create`. By default it uses `"sentence-transformers/paraphrase-MiniLM-L6-v2"`.
|
||||
|
||||
=== "Python"
|
||||
```python
|
||||
import lancedb
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
@@ -73,7 +91,16 @@ actual = table.search(query).limit(1).to_pydantic(Words)[0]
|
||||
print(actual.text)
|
||||
```
|
||||
|
||||
=== "TypeScript"
|
||||
|
||||
Coming Soon!
|
||||
|
||||
=== "Rust"
|
||||
|
||||
Coming Soon!
|
||||
|
||||
### Jina Embeddings
|
||||
|
||||
LanceDB registers the JinaAI embeddings function in the registry as `jina`. You can pass any supported model name to the `create`. By default it uses `"jina-clip-v1"`.
|
||||
`jina-clip-v1` can handle both text and images and other models only support `text`.
|
||||
|
||||
|
||||
@@ -32,25 +32,51 @@ LanceDB OSS supports object stores such as AWS S3 (and compatible stores), Azure
|
||||
db = lancedb.connect("az://bucket/path")
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
AWS S3:
|
||||
|
||||
```javascript
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect("s3://bucket/path");
|
||||
```
|
||||
|
||||
Google Cloud Storage:
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect("gs://bucket/path");
|
||||
```
|
||||
|
||||
Azure Blob Storage:
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect("az://bucket/path");
|
||||
```
|
||||
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
AWS S3:
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("s3://bucket/path");
|
||||
```
|
||||
|
||||
Google Cloud Storage:
|
||||
|
||||
```javascript
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("gs://bucket/path");
|
||||
```
|
||||
|
||||
Azure Blob Storage:
|
||||
|
||||
```javascript
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("az://bucket/path");
|
||||
```
|
||||
@@ -78,12 +104,25 @@ If you only want this to apply to one particular connection, you can pass the `s
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
|
||||
const db = await lancedb.connect("s3://bucket/path", {
|
||||
storageOptions: {timeout: "60s"}
|
||||
});
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("s3://bucket/path",
|
||||
{storageOptions: {timeout: "60s"}});
|
||||
const db = await lancedb.connect("s3://bucket/path", {
|
||||
storageOptions: {timeout: "60s"}
|
||||
});
|
||||
```
|
||||
|
||||
Getting even more specific, you can set the `timeout` for only a particular table:
|
||||
@@ -101,10 +140,25 @@ Getting even more specific, you can set the `timeout` for only a particular tabl
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
<!-- skip-test -->
|
||||
```javascript
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect("s3://bucket/path");
|
||||
const table = db.createTable(
|
||||
"table",
|
||||
[{ a: 1, b: 2}],
|
||||
{storageOptions: {timeout: "60s"}}
|
||||
);
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
<!-- skip-test -->
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("s3://bucket/path");
|
||||
const table = db.createTable(
|
||||
@@ -135,7 +189,6 @@ There are several options that can be set for all object stores, mostly related
|
||||
| `proxy_ca_certificate` | PEM-formatted CA certificate for proxy connections. |
|
||||
| `proxy_excludes` | List of hosts that bypass the proxy. This is a comma-separated list of domains and IP masks. Any subdomain of the provided domain will be bypassed. For example, `example.com, 192.168.1.0/24` would bypass `https://api.example.com`, `https://www.example.com`, and any IP in the range `192.168.1.0/24`. |
|
||||
|
||||
|
||||
### AWS S3
|
||||
|
||||
To configure credentials for AWS S3, you can use the `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and `AWS_SESSION_TOKEN` keys. Region can also be set, but it is not mandatory when using AWS.
|
||||
@@ -155,9 +208,27 @@ These can be set as environment variables or passed in the `storage_options` par
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect(
|
||||
"s3://bucket/path",
|
||||
{
|
||||
storageOptions: {
|
||||
awsAccessKeyId: "my-access-key",
|
||||
awsSecretAccessKey: "my-secret-key",
|
||||
awsSessionToken: "my-session-token",
|
||||
}
|
||||
}
|
||||
);
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect(
|
||||
"s3://bucket/path",
|
||||
@@ -188,7 +259,6 @@ The following keys can be used as both environment variables or keys in the `sto
|
||||
| `aws_sse_kms_key_id` | The KMS key ID to use for server-side encryption. If set, `aws_server_side_encryption` must be `"aws:kms"` or `"aws:kms:dsse"`. |
|
||||
| `aws_sse_bucket_key_enabled` | Whether to use bucket keys for server-side encryption. |
|
||||
|
||||
|
||||
!!! tip "Automatic cleanup for failed writes"
|
||||
|
||||
LanceDB uses [multi-part uploads](https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html) when writing data to S3 in order to maximize write speed. LanceDB will abort these uploads when it shuts down gracefully, such as when cancelled by keyboard interrupt. However, in the rare case that LanceDB crashes, it is possible that some data will be left lingering in your account. To cleanup this data, we recommend (as AWS themselves do) that you setup a lifecycle rule to delete in-progress uploads after 7 days. See the AWS guide:
|
||||
@@ -384,9 +454,26 @@ LanceDB can also connect to S3-compatible stores, such as MinIO. To do so, you m
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect(
|
||||
"s3://bucket/path",
|
||||
{
|
||||
storageOptions: {
|
||||
region: "us-east-1",
|
||||
endpoint: "http://minio:9000",
|
||||
}
|
||||
}
|
||||
);
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect(
|
||||
"s3://bucket/path",
|
||||
@@ -428,10 +515,12 @@ To configure LanceDB to use an S3 Express endpoint, you must set the storage opt
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
const lancedb = require("lancedb");
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect(
|
||||
"s3://my-bucket--use1-az4--x-s3/path",
|
||||
{
|
||||
@@ -443,6 +532,20 @@ To configure LanceDB to use an S3 Express endpoint, you must set the storage opt
|
||||
);
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect(
|
||||
"s3://my-bucket--use1-az4--x-s3/path",
|
||||
{
|
||||
storageOptions: {
|
||||
region: "us-east-1",
|
||||
s3Express: "true",
|
||||
}
|
||||
}
|
||||
);
|
||||
```
|
||||
|
||||
### Google Cloud Storage
|
||||
|
||||
@@ -461,9 +564,25 @@ GCS credentials are configured by setting the `GOOGLE_SERVICE_ACCOUNT` environme
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect(
|
||||
"gs://my-bucket/my-database",
|
||||
{
|
||||
storageOptions: {
|
||||
serviceAccount: "path/to/service-account.json",
|
||||
}
|
||||
}
|
||||
);
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect(
|
||||
"gs://my-bucket/my-database",
|
||||
@@ -475,12 +594,10 @@ GCS credentials are configured by setting the `GOOGLE_SERVICE_ACCOUNT` environme
|
||||
);
|
||||
```
|
||||
|
||||
|
||||
!!! info "HTTP/2 support"
|
||||
|
||||
By default, GCS uses HTTP/1 for communication, as opposed to HTTP/2. This improves maximum throughput significantly. However, if you wish to use HTTP/2 for some reason, you can set the environment variable `HTTP1_ONLY` to `false`.
|
||||
|
||||
|
||||
The following keys can be used as both environment variables or keys in the `storage_options` parameter:
|
||||
<!-- source: https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html -->
|
||||
|
||||
@@ -490,7 +607,6 @@ The following keys can be used as both environment variables or keys in the `sto
|
||||
| ``google_service_account_key`` | The serialized service account key. |
|
||||
| ``google_application_credentials`` | Path to the application credentials. |
|
||||
|
||||
|
||||
### Azure Blob Storage
|
||||
|
||||
Azure Blob Storage credentials can be configured by setting the `AZURE_STORAGE_ACCOUNT_NAME`and `AZURE_STORAGE_ACCOUNT_KEY` environment variables. Alternatively, you can pass the account name and key in the `storage_options` parameter:
|
||||
@@ -509,9 +625,26 @@ Azure Blob Storage credentials can be configured by setting the `AZURE_STORAGE_A
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect(
|
||||
"az://my-container/my-database",
|
||||
{
|
||||
storageOptions: {
|
||||
accountName: "some-account",
|
||||
accountKey: "some-key",
|
||||
}
|
||||
}
|
||||
);
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect(
|
||||
"az://my-container/my-database",
|
||||
|
||||
@@ -8,26 +8,39 @@ This guide will show how to create tables, insert data into them, and update the
|
||||
|
||||
## Creating a LanceDB Table
|
||||
|
||||
Initialize a LanceDB connection and create a table
|
||||
|
||||
=== "Python"
|
||||
Initialize a LanceDB connection and create a table using one of the many methods listed below.
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
db = lancedb.connect("./.lancedb")
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
LanceDB allows ingesting data from various sources - `dict`, `list[dict]`, `pd.DataFrame`, `pa.Table` or a `Iterator[pa.RecordBatch]`. Let's take a look at some of the these.
|
||||
|
||||
Initialize a VectorDB connection and create a table using one of the many methods listed below.
|
||||
=== "Typescript[^1]"
|
||||
|
||||
```javascript
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import * as arrow from "apache-arrow";
|
||||
|
||||
const uri = "data/sample-lancedb";
|
||||
const db = await lancedb.connect(uri);
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
const lancedb = require("vectordb");
|
||||
|
||||
const uri = "data/sample-lancedb";
|
||||
const db = await lancedb.connect(uri);
|
||||
```
|
||||
|
||||
LanceDB allows ingesting data from various sources - `dict`, `list[dict]`, `pd.DataFrame`, `pa.Table` or a `Iterator[pa.RecordBatch]`. Let's take a look at some of the these.
|
||||
|
||||
|
||||
### From list of tuples or dictionaries
|
||||
|
||||
@@ -45,6 +58,7 @@ This guide will show how to create tables, insert data into them, and update the
|
||||
|
||||
db["my_table"].head()
|
||||
```
|
||||
|
||||
!!! info "Note"
|
||||
If the table already exists, LanceDB will raise an error by default.
|
||||
|
||||
@@ -63,24 +77,52 @@ This guide will show how to create tables, insert data into them, and update the
|
||||
db.create_table("name", data, mode="overwrite")
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
You can create a LanceDB table in JavaScript using an array of JSON records as follows.
|
||||
=== "Typescript[^1]"
|
||||
You can create a LanceDB table in JavaScript using an array of records as follows.
|
||||
|
||||
```javascript
|
||||
const tb = await db.createTable("my_table", [{
|
||||
"vector": [3.1, 4.1],
|
||||
"item": "foo",
|
||||
"price": 10.0
|
||||
}, {
|
||||
"vector": [5.9, 26.5],
|
||||
"item": "bar",
|
||||
"price": 20.0
|
||||
}]);
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/basic.ts:create_table"
|
||||
```
|
||||
!!! info "Note"
|
||||
If the table already exists, LanceDB will raise an error by default. If you want to overwrite the table, you need to specify the `WriteMode` in the createTable function.
|
||||
|
||||
```javascript
|
||||
This will infer the schema from the provided data. If you want to explicitly provide a schema, you can use `apache-arrow` to declare a schema
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/basic.ts:create_table_with_schema"
|
||||
```
|
||||
|
||||
!!! info "Note"
|
||||
`createTable` supports an optional `existsOk` parameter. When set to true
|
||||
and the table exists, then it simply opens the existing table. The data you
|
||||
passed in will NOT be appended to the table in that case.
|
||||
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/basic.ts:create_table_exists_ok"
|
||||
```
|
||||
|
||||
Sometimes you want to make sure that you start fresh. If you want to
|
||||
overwrite the table, you can pass in mode: "overwrite" to the createTable function.
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/basic.ts:create_table_overwrite"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
--8<-- "docs/src/basic_legacy.ts:create_table"
|
||||
```
|
||||
|
||||
!!! warning
|
||||
`existsOk` option is not supported in `vectordb`
|
||||
|
||||
Sometimes you want to make sure that you start fresh. If you want to
|
||||
overwrite the table, you can pass in mode: "overwrite" to the createTable function.
|
||||
|
||||
```ts
|
||||
const table = await con.createTable(tableName, data, { writeMode: WriteMode.Overwrite })
|
||||
```
|
||||
|
||||
@@ -99,6 +141,7 @@ This guide will show how to create tables, insert data into them, and update the
|
||||
|
||||
db["my_table"].head()
|
||||
```
|
||||
|
||||
!!! info "Note"
|
||||
Data is converted to Arrow before being written to disk. For maximum control over how data is saved, either provide the PyArrow schema to convert to or else provide a PyArrow Table directly.
|
||||
|
||||
@@ -133,10 +176,11 @@ table = db.create_table("pl_table", data=data)
|
||||
```
|
||||
|
||||
### From an Arrow Table
|
||||
=== "Python"
|
||||
You can also create LanceDB tables directly from Arrow tables.
|
||||
LanceDB supports float16 data type!
|
||||
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
import pyarrows as pa
|
||||
import numpy as np
|
||||
@@ -160,11 +204,17 @@ table = db.create_table("pl_table", data=data)
|
||||
tbl = db.create_table("f16_tbl", data, schema=schema)
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
You can also create LanceDB tables directly from Arrow tables.
|
||||
LanceDB supports Float16 data type!
|
||||
=== "Typescript[^1]"
|
||||
|
||||
```javascript
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:create_f16_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:create_f16_table"
|
||||
```
|
||||
|
||||
@@ -329,23 +379,24 @@ You can also use iterators of other types like Pandas DataFrame or Pylists direc
|
||||
tbl = db.open_table("my_table")
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
If you forget the name of your table, you can always get a listing of all table names.
|
||||
|
||||
```javascript
|
||||
```typescript
|
||||
console.log(await db.tableNames());
|
||||
```
|
||||
|
||||
Then, you can open any existing tables.
|
||||
|
||||
```javascript
|
||||
```typescript
|
||||
const tbl = await db.openTable("my_table");
|
||||
```
|
||||
|
||||
## Creating empty table
|
||||
You can create an empty table for scenarios where you want to add data to the table later. An example would be when you want to collect data from a stream/external file and then add it to a table in batches.
|
||||
|
||||
=== "Python"
|
||||
In Python, you can create an empty table for scenarios where you want to add data to the table later. An example would be when you want to collect data from a stream/external file and then add it to a table in batches.
|
||||
|
||||
```python
|
||||
|
||||
@@ -382,9 +433,23 @@ You can also use iterators of other types like Pandas DataFrame or Pylists direc
|
||||
|
||||
Once the empty table has been created, you can add data to it via the various methods listed in the [Adding to a table](#adding-to-a-table) section.
|
||||
|
||||
=== "Typescript[^1]"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:create_empty_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:create_empty_table"
|
||||
```
|
||||
|
||||
## Adding to a table
|
||||
|
||||
After a table has been created, you can always add more data to it using the various methods available.
|
||||
After a table has been created, you can always add more data to it usind the `add` method
|
||||
|
||||
=== "Python"
|
||||
You can add any of the valid data structures accepted by LanceDB table, i.e, `dict`, `list[dict]`, `pd.DataFrame`, or `Iterator[pa.RecordBatch]`. Below are some examples.
|
||||
@@ -472,9 +537,7 @@ After a table has been created, you can always add more data to it using the var
|
||||
tbl.add(models)
|
||||
```
|
||||
|
||||
|
||||
|
||||
=== "JavaScript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
```javascript
|
||||
await tbl.add(
|
||||
@@ -530,15 +593,15 @@ Use the `delete()` method on tables to delete rows from a table. To choose which
|
||||
# 0 3 [5.0, 6.0]
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
```javascript
|
||||
```ts
|
||||
await tbl.delete('item = "fizz"')
|
||||
```
|
||||
|
||||
### Deleting row with specific column value
|
||||
|
||||
```javascript
|
||||
```ts
|
||||
const con = await lancedb.connect("./.lancedb")
|
||||
const data = [
|
||||
{id: 1, vector: [1, 2]},
|
||||
@@ -552,7 +615,7 @@ Use the `delete()` method on tables to delete rows from a table. To choose which
|
||||
|
||||
### Delete from a list of values
|
||||
|
||||
```javascript
|
||||
```ts
|
||||
const to_remove = [1, 5];
|
||||
await tbl.delete(`id IN (${to_remove.join(",")})`)
|
||||
await tbl.countRows() // Returns 1
|
||||
@@ -609,11 +672,32 @@ This can be used to update zero to all rows depending on how many rows match the
|
||||
2 2 [10.0, 10.0]
|
||||
```
|
||||
|
||||
=== "JavaScript/Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
API Reference: [lancedb.Table.update](../js/classes/Table.md/#update)
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
|
||||
const db = await lancedb.connect("./.lancedb");
|
||||
|
||||
const data = [
|
||||
{x: 1, vector: [1, 2]},
|
||||
{x: 2, vector: [3, 4]},
|
||||
{x: 3, vector: [5, 6]},
|
||||
];
|
||||
const tbl = await db.createTable("my_table", data)
|
||||
|
||||
await tbl.update({vector: [10, 10]}, { where: "x = 2"})
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
API Reference: [vectordb.Table.update](../javascript/interfaces/Table.md/#update)
|
||||
|
||||
```javascript
|
||||
```ts
|
||||
const lancedb = require("vectordb");
|
||||
|
||||
const db = await lancedb.connect("./.lancedb");
|
||||
@@ -628,6 +712,8 @@ This can be used to update zero to all rows depending on how many rows match the
|
||||
await tbl.update({ where: "x = 2", values: {vector: [10, 10]} })
|
||||
```
|
||||
|
||||
#### Updating using a sql query
|
||||
|
||||
The `values` parameter is used to provide the new values for the columns as literal values. You can also use the `values_sql` / `valuesSql` parameter to provide SQL expressions for the new values. For example, you can use `values_sql="x + 1"` to increment the value of the `x` column by 1.
|
||||
|
||||
=== "Python"
|
||||
@@ -647,9 +733,15 @@ The `values` parameter is used to provide the new values for the columns as lite
|
||||
2 3 [10.0, 10.0]
|
||||
```
|
||||
|
||||
=== "JavaScript/Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
```javascript
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
Coming Soon!
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
await tbl.update({ valuesSql: { x: "x + 1" } })
|
||||
```
|
||||
|
||||
@@ -672,7 +764,7 @@ Use the `drop_table()` method on the database to remove a table.
|
||||
By default, if the table does not exist an exception is raised. To suppress this,
|
||||
you can pass in `ignore_missing=True`.
|
||||
|
||||
=== "Javascript/Typescript"
|
||||
=== "TypeScript"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:drop_table"
|
||||
@@ -726,18 +818,18 @@ There are three possible settings for `read_consistency_interval`:
|
||||
table.checkout_latest()
|
||||
```
|
||||
|
||||
=== "JavaScript/Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
To set strong consistency, use `0`:
|
||||
|
||||
```javascript
|
||||
```ts
|
||||
const db = await lancedb.connect({ uri: "./.lancedb", readConsistencyInterval: 0 });
|
||||
const table = await db.openTable("my_table");
|
||||
```
|
||||
|
||||
For eventual consistency, specify the update interval as seconds:
|
||||
|
||||
```javascript
|
||||
```ts
|
||||
const db = await lancedb.connect({ uri: "./.lancedb", readConsistencyInterval: 5 });
|
||||
const table = await db.openTable("my_table");
|
||||
```
|
||||
@@ -749,3 +841,5 @@ There are three possible settings for `read_consistency_interval`:
|
||||
## What's next?
|
||||
|
||||
Learn the best practices on creating an ANN index and getting the most out of it.
|
||||
|
||||
[^1]: The `vectordb` package is a legacy package that is deprecated in favor of `@lancedb/lancedb`. The `vectordb` package will continue to receive bug fixes and security updates until September 2024. We recommend all new projects use `@lancedb/lancedb`. See the [migration guide](migration.md) for more information.
|
||||
|
||||
@@ -53,9 +53,20 @@ db.create_table("my_vectors", data=data)
|
||||
.to_list()
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/search.ts:import"
|
||||
|
||||
--8<-- "nodejs/examples/search.ts:search1"
|
||||
```
|
||||
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
--8<-- "docs/src/search_legacy.ts:import"
|
||||
|
||||
--8<-- "docs/src/search_legacy.ts:search1"
|
||||
@@ -73,7 +84,15 @@ By default, `l2` will be used as metric type. You can specify the metric type as
|
||||
.to_list()
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/search.ts:search2"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```javascript
|
||||
--8<-- "docs/src/search_legacy.ts:search2"
|
||||
|
||||
@@ -44,9 +44,17 @@ const tbl = await db.createTable('myVectors', data)
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/filtering.ts:search"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
--8<-- "docs/src/sql_legacy.ts:search"
|
||||
```
|
||||
|
||||
@@ -78,9 +86,17 @@ For example, the following filter string is acceptable:
|
||||
.to_arrow()
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/filtering.ts:vec_search"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
--8<-- "docs/src/sql_legacy.ts:vec_search"
|
||||
```
|
||||
|
||||
@@ -148,9 +164,17 @@ You can also filter your data without search.
|
||||
tbl.search().where("id = 10").limit(10).to_arrow()
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/filtering.ts:sql_search"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
--8<---- "docs/src/sql_legacy.ts:sql_search"
|
||||
```
|
||||
|
||||
|
||||
@@ -6,5 +6,5 @@
|
||||
"target": "es2022",
|
||||
"types": ["jest", "node"]
|
||||
},
|
||||
"include": ["**/*"]
|
||||
"include": ["**/*", "../examples/ann_indexes.ts"]
|
||||
}
|
||||
|
||||
@@ -94,7 +94,13 @@
|
||||
"useValidTypeof": "error"
|
||||
}
|
||||
},
|
||||
"ignore": ["**/dist/**/*", "**/native.js", "**/native.d.ts"]
|
||||
"ignore": [
|
||||
"**/dist/**/*",
|
||||
"**/native.js",
|
||||
"**/native.d.ts",
|
||||
"__test__/docs/**/*",
|
||||
"examples/**/*"
|
||||
]
|
||||
},
|
||||
"javascript": {
|
||||
"globals": []
|
||||
|
||||
1
nodejs/examples/.gitignore
vendored
Normal file
1
nodejs/examples/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
data/
|
||||
49
nodejs/examples/ann_indexes.ts
Normal file
49
nodejs/examples/ann_indexes.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
// --8<-- [start:import]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
// --8<-- [end:import]
|
||||
|
||||
// --8<-- [start:ingest]
|
||||
const db = await lancedb.connect("/tmp/lancedb/");
|
||||
|
||||
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
||||
vector: Array(1536).fill(i),
|
||||
id: `${i}`,
|
||||
content: "",
|
||||
longId: `${i}`,
|
||||
}));
|
||||
|
||||
const table = await db.createTable("my_vectors", data, { mode: "overwrite" });
|
||||
await table.createIndex("vector", {
|
||||
config: lancedb.Index.ivfPq({
|
||||
numPartitions: 16,
|
||||
numSubVectors: 48,
|
||||
}),
|
||||
});
|
||||
// --8<-- [end:ingest]
|
||||
|
||||
// --8<-- [start:search1]
|
||||
const _results1 = await table
|
||||
.search(Array(1536).fill(1.2))
|
||||
.limit(2)
|
||||
.nprobes(20)
|
||||
.refineFactor(10)
|
||||
.toArray();
|
||||
// --8<-- [end:search1]
|
||||
|
||||
// --8<-- [start:search2]
|
||||
const _results2 = await table
|
||||
.search(Array(1536).fill(1.2))
|
||||
.where("id != '1141'")
|
||||
.limit(2)
|
||||
.toArray();
|
||||
// --8<-- [end:search2]
|
||||
|
||||
// --8<-- [start:search3]
|
||||
const _results3 = await table
|
||||
.search(Array(1536).fill(1.2))
|
||||
.select(["id"])
|
||||
.limit(2)
|
||||
.toArray();
|
||||
// --8<-- [end:search3]
|
||||
|
||||
console.log("Ann indexes: done");
|
||||
149
nodejs/examples/basic.ts
Normal file
149
nodejs/examples/basic.ts
Normal file
@@ -0,0 +1,149 @@
|
||||
// --8<-- [start:imports]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import * as arrow from "apache-arrow";
|
||||
import { Field, FixedSizeList, Float16, Int32, Schema } from "apache-arrow";
|
||||
|
||||
// --8<-- [end:imports]
|
||||
|
||||
// --8<-- [start:connect]
|
||||
const uri = "/tmp/lancedb/";
|
||||
const db = await lancedb.connect(uri);
|
||||
// --8<-- [end:connect]
|
||||
{
|
||||
// --8<-- [start:create_table]
|
||||
const data = [
|
||||
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
|
||||
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
|
||||
];
|
||||
const _tbl = await db.createTable("myTable", data);
|
||||
// --8<-- [end:create_table]
|
||||
{
|
||||
// --8<-- [start:create_table_exists_ok]
|
||||
const _tbl = await db.createTable("myTable", data, {
|
||||
existsOk: true,
|
||||
});
|
||||
// --8<-- [end:create_table_exists_ok]
|
||||
}
|
||||
{
|
||||
// --8<-- [start:create_table_overwrite]
|
||||
const _tbl = await db.createTable("myTable", data, {
|
||||
mode: "overwrite",
|
||||
});
|
||||
// --8<-- [end:create_table_overwrite]
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:create_table_with_schema]
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field(
|
||||
"vector",
|
||||
new arrow.FixedSizeList(
|
||||
2,
|
||||
new arrow.Field("item", new arrow.Float32(), true),
|
||||
),
|
||||
),
|
||||
new arrow.Field("item", new arrow.Utf8(), true),
|
||||
new arrow.Field("price", new arrow.Float32(), true),
|
||||
]);
|
||||
const data = [
|
||||
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
|
||||
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
|
||||
];
|
||||
const _tbl = await db.createTable("myTable", data, {
|
||||
schema,
|
||||
});
|
||||
// --8<-- [end:create_table_with_schema]
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:create_empty_table]
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field(
|
||||
"vector",
|
||||
new arrow.FixedSizeList(
|
||||
2,
|
||||
new arrow.Field("item", new arrow.Float32(), true),
|
||||
),
|
||||
),
|
||||
]);
|
||||
const _tbl = await db.createEmptyTable("empty_table", schema);
|
||||
// --8<-- [end:create_empty_table]
|
||||
}
|
||||
{
|
||||
// --8<-- [start:open_table]
|
||||
const _tbl = await db.openTable("myTable");
|
||||
// --8<-- [end:open_table]
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:table_names]
|
||||
const tableNames = await db.tableNames();
|
||||
console.log(tableNames);
|
||||
// --8<-- [end:table_names]
|
||||
}
|
||||
|
||||
const tbl = await db.openTable("myTable");
|
||||
{
|
||||
// --8<-- [start:add_data]
|
||||
const data = [
|
||||
{ vector: [1.3, 1.4], item: "fizz", price: 100.0 },
|
||||
{ vector: [9.5, 56.2], item: "buzz", price: 200.0 },
|
||||
];
|
||||
await tbl.add(data);
|
||||
// --8<-- [end:add_data]
|
||||
}
|
||||
{
|
||||
// --8<-- [start:vector_search]
|
||||
const _res = tbl.search([100, 100]).limit(2).toArray();
|
||||
// --8<-- [end:vector_search]
|
||||
}
|
||||
{
|
||||
const data = Array.from({ length: 1000 })
|
||||
.fill(null)
|
||||
.map(() => ({
|
||||
vector: [Math.random(), Math.random()],
|
||||
item: "autogen",
|
||||
price: Math.round(Math.random() * 100),
|
||||
}));
|
||||
|
||||
await tbl.add(data);
|
||||
}
|
||||
|
||||
// --8<-- [start:create_index]
|
||||
await tbl.createIndex("vector");
|
||||
// --8<-- [end:create_index]
|
||||
|
||||
// --8<-- [start:delete_rows]
|
||||
await tbl.delete('item = "fizz"');
|
||||
// --8<-- [end:delete_rows]
|
||||
|
||||
// --8<-- [start:drop_table]
|
||||
await db.dropTable("myTable");
|
||||
// --8<-- [end:drop_table]
|
||||
await db.dropTable("empty_table");
|
||||
|
||||
{
|
||||
// --8<-- [start:create_f16_table]
|
||||
const db = await lancedb.connect("/tmp/lancedb");
|
||||
const dim = 16;
|
||||
const total = 10;
|
||||
const f16Schema = new Schema([
|
||||
new Field("id", new Int32()),
|
||||
new Field(
|
||||
"vector",
|
||||
new FixedSizeList(dim, new Field("item", new Float16(), true)),
|
||||
false,
|
||||
),
|
||||
]);
|
||||
const data = lancedb.makeArrowTable(
|
||||
Array.from(Array(total), (_, i) => ({
|
||||
id: i,
|
||||
vector: Array.from(Array(dim), Math.random),
|
||||
})),
|
||||
{ schema: f16Schema },
|
||||
);
|
||||
const _table = await db.createTable("f16_tbl", data);
|
||||
// --8<-- [end:create_f16_table]
|
||||
await db.dropTable("f16_tbl");
|
||||
}
|
||||
83
nodejs/examples/embedding.ts
Normal file
83
nodejs/examples/embedding.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
// --8<-- [start:imports]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import { LanceSchema, getRegistry, register } from "@lancedb/lancedb/embedding";
|
||||
import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
|
||||
import { type Float, Float32, Utf8 } from "apache-arrow";
|
||||
// --8<-- [end:imports]
|
||||
|
||||
{
|
||||
// --8<-- [start:openai_embeddings]
|
||||
|
||||
const db = await lancedb.connect("/tmp/db");
|
||||
const func = getRegistry()
|
||||
.get("openai")
|
||||
?.create({ model: "text-embedding-ada-002" }) as EmbeddingFunction;
|
||||
|
||||
const wordsSchema = LanceSchema({
|
||||
text: func.sourceField(new Utf8()),
|
||||
vector: func.vectorField(),
|
||||
});
|
||||
const tbl = await db.createEmptyTable("words", wordsSchema, {
|
||||
mode: "overwrite",
|
||||
});
|
||||
await tbl.add([{ text: "hello world" }, { text: "goodbye world" }]);
|
||||
|
||||
const query = "greetings";
|
||||
const actual = (await (await tbl.search(query)).limit(1).toArray())[0];
|
||||
|
||||
// --8<-- [end:openai_embeddings]
|
||||
console.log("result = ", actual.text);
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:embedding_function]
|
||||
const db = await lancedb.connect("/tmp/db");
|
||||
|
||||
@register("my_embedding")
|
||||
class MyEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {};
|
||||
}
|
||||
ndims() {
|
||||
return 3;
|
||||
}
|
||||
embeddingDataType(): Float {
|
||||
return new Float32();
|
||||
}
|
||||
async computeQueryEmbeddings(_data: string) {
|
||||
// This is a placeholder for a real embedding function
|
||||
return [1, 2, 3];
|
||||
}
|
||||
async computeSourceEmbeddings(data: string[]) {
|
||||
// This is a placeholder for a real embedding function
|
||||
return Array.from({ length: data.length }).fill([1, 2, 3]) as number[][];
|
||||
}
|
||||
}
|
||||
|
||||
const func = new MyEmbeddingFunction();
|
||||
|
||||
const data = [{ text: "pepperoni" }, { text: "pineapple" }];
|
||||
|
||||
// Option 1: manually specify the embedding function
|
||||
const table = await db.createTable("vectors", data, {
|
||||
embeddingFunction: {
|
||||
function: func,
|
||||
sourceColumn: "text",
|
||||
vectorColumn: "vector",
|
||||
},
|
||||
mode: "overwrite",
|
||||
});
|
||||
|
||||
// Option 2: provide the embedding function through a schema
|
||||
|
||||
const schema = LanceSchema({
|
||||
text: func.sourceField(new Utf8()),
|
||||
vector: func.vectorField(),
|
||||
});
|
||||
|
||||
const table2 = await db.createTable("vectors2", data, {
|
||||
schema,
|
||||
mode: "overwrite",
|
||||
});
|
||||
// --8<-- [end:embedding_function]
|
||||
}
|
||||
34
nodejs/examples/filtering.ts
Normal file
34
nodejs/examples/filtering.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
|
||||
const db = await lancedb.connect("data/sample-lancedb");
|
||||
|
||||
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
||||
vector: Array(1536).fill(i),
|
||||
id: i,
|
||||
item: `item ${i}`,
|
||||
strId: `${i}`,
|
||||
}));
|
||||
|
||||
const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
|
||||
|
||||
// --8<-- [start:search]
|
||||
const _result = await tbl
|
||||
.search(Array(1536).fill(0.5))
|
||||
.limit(1)
|
||||
.where("id = 10")
|
||||
.toArray();
|
||||
// --8<-- [end:search]
|
||||
|
||||
// --8<-- [start:vec_search]
|
||||
await tbl
|
||||
.search(Array(1536).fill(0))
|
||||
.where("(item IN ('item 0', 'item 2')) AND (id > 10)")
|
||||
.postfilter()
|
||||
.toArray();
|
||||
// --8<-- [end:vec_search]
|
||||
|
||||
// --8<-- [start:sql_search]
|
||||
await tbl.query().where("id = 10").limit(10).toArray();
|
||||
// --8<-- [end:sql_search]
|
||||
|
||||
console.log("SQL search: done");
|
||||
27
nodejs/examples/jsconfig.json
Normal file
27
nodejs/examples/jsconfig.json
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
// Enable latest features
|
||||
"lib": ["ESNext", "DOM"],
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleDetection": "force",
|
||||
"jsx": "react-jsx",
|
||||
"allowJs": true,
|
||||
|
||||
// Bundler mode
|
||||
"moduleResolution": "bundler",
|
||||
"allowImportingTsExtensions": true,
|
||||
"verbatimModuleSyntax": true,
|
||||
"noEmit": true,
|
||||
|
||||
// Best practices
|
||||
"strict": true,
|
||||
"skipLibCheck": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
|
||||
// Some stricter flags (disabled by default)
|
||||
"noUnusedLocals": false,
|
||||
"noUnusedParameters": false,
|
||||
"noPropertyAccessFromIndexSignature": false
|
||||
}
|
||||
}
|
||||
79
nodejs/examples/package-lock.json
generated
Normal file
79
nodejs/examples/package-lock.json
generated
Normal file
@@ -0,0 +1,79 @@
|
||||
{
|
||||
"name": "examples",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "examples",
|
||||
"version": "1.0.0",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@lancedb/lancedb": "file:../"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5.0.0"
|
||||
}
|
||||
},
|
||||
"..": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.6.0",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache 2.0",
|
||||
"os": [
|
||||
"darwin",
|
||||
"linux",
|
||||
"win32"
|
||||
],
|
||||
"dependencies": {
|
||||
"apache-arrow": "^15.0.0",
|
||||
"axios": "^1.7.2",
|
||||
"openai": "^4.29.2",
|
||||
"reflect-metadata": "^0.2.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@aws-sdk/client-kms": "^3.33.0",
|
||||
"@aws-sdk/client-s3": "^3.33.0",
|
||||
"@biomejs/biome": "^1.7.3",
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@napi-rs/cli": "^2.18.0",
|
||||
"@types/axios": "^0.14.0",
|
||||
"@types/jest": "^29.1.2",
|
||||
"@types/tmp": "^0.2.6",
|
||||
"apache-arrow-old": "npm:apache-arrow@13.0.0",
|
||||
"eslint": "^8.57.0",
|
||||
"jest": "^29.7.0",
|
||||
"shx": "^0.3.4",
|
||||
"tmp": "^0.2.3",
|
||||
"ts-jest": "^29.1.2",
|
||||
"typedoc": "^0.25.7",
|
||||
"typedoc-plugin-markdown": "^3.17.1",
|
||||
"typescript": "^5.3.3",
|
||||
"typescript-eslint": "^7.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/lancedb": {
|
||||
"resolved": "..",
|
||||
"link": true
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.5.2",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.2.tgz",
|
||||
"integrity": "sha512-NcRtPEOsPFFWjobJEtfihkLCZCXZt/os3zf8nTxjVH3RvTSxjrCamJpbExGvYOF+tFHc3pA65qpdwPbzjohhew==",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.17"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
18
nodejs/examples/package.json
Normal file
18
nodejs/examples/package.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "examples",
|
||||
"version": "1.0.0",
|
||||
"description": "Examples for LanceDB",
|
||||
"main": "index.js",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "Lance Devs",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@lancedb/lancedb": "file:../"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5.0.0"
|
||||
}
|
||||
}
|
||||
37
nodejs/examples/search.ts
Normal file
37
nodejs/examples/search.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
// --8<-- [end:import]
|
||||
import * as fs from "node:fs";
|
||||
// --8<-- [start:import]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
|
||||
async function setup() {
|
||||
fs.rmSync("data/sample-lancedb", { recursive: true, force: true });
|
||||
const db = await lancedb.connect("data/sample-lancedb");
|
||||
|
||||
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
||||
vector: Array(1536).fill(i),
|
||||
id: `${i}`,
|
||||
content: "",
|
||||
longId: `${i}`,
|
||||
}));
|
||||
|
||||
await db.createTable("my_vectors", data);
|
||||
}
|
||||
|
||||
await setup();
|
||||
|
||||
// --8<-- [start:search1]
|
||||
const db = await lancedb.connect("data/sample-lancedb");
|
||||
const tbl = await db.openTable("my_vectors");
|
||||
|
||||
const _results1 = await tbl.search(Array(1536).fill(1.2)).limit(10).toArray();
|
||||
// --8<-- [end:search1]
|
||||
|
||||
// --8<-- [start:search2]
|
||||
const _results2 = await tbl
|
||||
.search(Array(1536).fill(1.2))
|
||||
.distanceType("cosine")
|
||||
.limit(10)
|
||||
.toArray();
|
||||
// --8<-- [end:search2]
|
||||
|
||||
console.log("search: done");
|
||||
208
nodejs/native.d.ts
vendored
Normal file
208
nodejs/native.d.ts
vendored
Normal file
@@ -0,0 +1,208 @@
|
||||
/* tslint:disable */
|
||||
/* eslint-disable */
|
||||
|
||||
/* auto-generated by NAPI-RS */
|
||||
|
||||
/** A description of an index currently configured on a column */
|
||||
export interface IndexConfig {
|
||||
/** The name of the index */
|
||||
name: string
|
||||
/** The type of the index */
|
||||
indexType: string
|
||||
/**
|
||||
* The columns in the index
|
||||
*
|
||||
* Currently this is always an array of size 1. In the future there may
|
||||
* be more columns to represent composite indices.
|
||||
*/
|
||||
columns: Array<string>
|
||||
}
|
||||
/** Statistics about a compaction operation. */
|
||||
export interface CompactionStats {
|
||||
/** The number of fragments removed */
|
||||
fragmentsRemoved: number
|
||||
/** The number of new, compacted fragments added */
|
||||
fragmentsAdded: number
|
||||
/** The number of data files removed */
|
||||
filesRemoved: number
|
||||
/** The number of new, compacted data files added */
|
||||
filesAdded: number
|
||||
}
|
||||
/** Statistics about a cleanup operation */
|
||||
export interface RemovalStats {
|
||||
/** The number of bytes removed */
|
||||
bytesRemoved: number
|
||||
/** The number of old versions removed */
|
||||
oldVersionsRemoved: number
|
||||
}
|
||||
/** Statistics about an optimize operation */
|
||||
export interface OptimizeStats {
|
||||
/** Statistics about the compaction operation */
|
||||
compaction: CompactionStats
|
||||
/** Statistics about the removal operation */
|
||||
prune: RemovalStats
|
||||
}
|
||||
/**
|
||||
* A definition of a column alteration. The alteration changes the column at
|
||||
* `path` to have the new name `name`, to be nullable if `nullable` is true,
|
||||
* and to have the data type `data_type`. At least one of `rename` or `nullable`
|
||||
* must be provided.
|
||||
*/
|
||||
export interface ColumnAlteration {
|
||||
/**
|
||||
* The path to the column to alter. This is a dot-separated path to the column.
|
||||
* If it is a top-level column then it is just the name of the column. If it is
|
||||
* a nested column then it is the path to the column, e.g. "a.b.c" for a column
|
||||
* `c` nested inside a column `b` nested inside a column `a`.
|
||||
*/
|
||||
path: string
|
||||
/**
|
||||
* The new name of the column. If not provided then the name will not be changed.
|
||||
* This must be distinct from the names of all other columns in the table.
|
||||
*/
|
||||
rename?: string
|
||||
/** Set the new nullability. Note that a nullable column cannot be made non-nullable. */
|
||||
nullable?: boolean
|
||||
}
|
||||
/** A definition of a new column to add to a table. */
|
||||
export interface AddColumnsSql {
|
||||
/** The name of the new column. */
|
||||
name: string
|
||||
/**
|
||||
* The values to populate the new column with, as a SQL expression.
|
||||
* The expression can reference other columns in the table.
|
||||
*/
|
||||
valueSql: string
|
||||
}
|
||||
export interface IndexStatistics {
|
||||
/** The number of rows indexed by the index */
|
||||
numIndexedRows: number
|
||||
/** The number of rows not indexed */
|
||||
numUnindexedRows: number
|
||||
/** The type of the index */
|
||||
indexType?: string
|
||||
/** The metadata for each index */
|
||||
indices: Array<IndexMetadata>
|
||||
}
|
||||
export interface IndexMetadata {
|
||||
metricType?: string
|
||||
indexType?: string
|
||||
}
|
||||
export interface ConnectionOptions {
|
||||
/**
|
||||
* (For LanceDB OSS only): The interval, in seconds, at which to check for
|
||||
* updates to the table from other processes. If None, then consistency is not
|
||||
* checked. For performance reasons, this is the default. For strong
|
||||
* consistency, set this to zero seconds. Then every read will check for
|
||||
* updates from other processes. As a compromise, you can set this to a
|
||||
* non-zero value for eventual consistency. If more than that interval
|
||||
* has passed since the last check, then the table will be checked for updates.
|
||||
* Note: this consistency only applies to read operations. Write operations are
|
||||
* always consistent.
|
||||
*/
|
||||
readConsistencyInterval?: number
|
||||
/**
|
||||
* (For LanceDB OSS only): configuration for object storage.
|
||||
*
|
||||
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
*/
|
||||
storageOptions?: Record<string, string>
|
||||
}
|
||||
/** Write mode for writing a table. */
|
||||
export const enum WriteMode {
|
||||
Create = 'Create',
|
||||
Append = 'Append',
|
||||
Overwrite = 'Overwrite'
|
||||
}
|
||||
/** Write options when creating a Table. */
|
||||
export interface WriteOptions {
|
||||
/** Write mode for writing to a table. */
|
||||
mode?: WriteMode
|
||||
}
|
||||
export interface OpenTableOptions {
|
||||
storageOptions?: Record<string, string>
|
||||
}
|
||||
export class Connection {
|
||||
/** Create a new Connection instance from the given URI. */
|
||||
static new(uri: string, options: ConnectionOptions): Promise<Connection>
|
||||
display(): string
|
||||
isOpen(): boolean
|
||||
close(): void
|
||||
/** List all tables in the dataset. */
|
||||
tableNames(startAfter?: string | undefined | null, limit?: number | undefined | null): Promise<Array<string>>
|
||||
/**
|
||||
* Create table from a Apache Arrow IPC (file) buffer.
|
||||
*
|
||||
* Parameters:
|
||||
* - name: The name of the table.
|
||||
* - buf: The buffer containing the IPC file.
|
||||
*
|
||||
*/
|
||||
createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
||||
createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
||||
openTable(name: string, storageOptions?: Record<string, string> | undefined | null, indexCacheSize?: number | undefined | null): Promise<Table>
|
||||
/** Drop table with the name. Or raise an error if the table does not exist. */
|
||||
dropTable(name: string): Promise<void>
|
||||
}
|
||||
export class Index {
|
||||
static ivfPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
|
||||
static btree(): Index
|
||||
}
|
||||
/** Typescript-style Async Iterator over RecordBatches */
|
||||
export class RecordBatchIterator {
|
||||
next(): Promise<Buffer | null>
|
||||
}
|
||||
/** A builder used to create and run a merge insert operation */
|
||||
export class NativeMergeInsertBuilder {
|
||||
whenMatchedUpdateAll(condition?: string | undefined | null): NativeMergeInsertBuilder
|
||||
whenNotMatchedInsertAll(): NativeMergeInsertBuilder
|
||||
whenNotMatchedBySourceDelete(filter?: string | undefined | null): NativeMergeInsertBuilder
|
||||
execute(buf: Buffer): Promise<void>
|
||||
}
|
||||
export class Query {
|
||||
onlyIf(predicate: string): void
|
||||
select(columns: Array<[string, string]>): void
|
||||
limit(limit: number): void
|
||||
nearestTo(vector: Float32Array): VectorQuery
|
||||
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
||||
explainPlan(verbose: boolean): Promise<string>
|
||||
}
|
||||
export class VectorQuery {
|
||||
column(column: string): void
|
||||
distanceType(distanceType: string): void
|
||||
postfilter(): void
|
||||
refineFactor(refineFactor: number): void
|
||||
nprobes(nprobe: number): void
|
||||
bypassVectorIndex(): void
|
||||
onlyIf(predicate: string): void
|
||||
select(columns: Array<[string, string]>): void
|
||||
limit(limit: number): void
|
||||
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
||||
explainPlan(verbose: boolean): Promise<string>
|
||||
}
|
||||
export class Table {
|
||||
name: string
|
||||
display(): string
|
||||
isOpen(): boolean
|
||||
close(): void
|
||||
/** Return Schema as empty Arrow IPC file. */
|
||||
schema(): Promise<Buffer>
|
||||
add(buf: Buffer, mode: string): Promise<void>
|
||||
countRows(filter?: string | undefined | null): Promise<number>
|
||||
delete(predicate: string): Promise<void>
|
||||
createIndex(index: Index | undefined | null, column: string, replace?: boolean | undefined | null): Promise<void>
|
||||
update(onlyIf: string | undefined | null, columns: Array<[string, string]>): Promise<void>
|
||||
query(): Query
|
||||
vectorSearch(vector: Float32Array): VectorQuery
|
||||
addColumns(transforms: Array<AddColumnsSql>): Promise<void>
|
||||
alterColumns(alterations: Array<ColumnAlteration>): Promise<void>
|
||||
dropColumns(columns: Array<string>): Promise<void>
|
||||
version(): Promise<number>
|
||||
checkout(version: number): Promise<void>
|
||||
checkoutLatest(): Promise<void>
|
||||
restore(): Promise<void>
|
||||
optimize(olderThanMs?: number | undefined | null): Promise<OptimizeStats>
|
||||
listIndices(): Promise<Array<IndexConfig>>
|
||||
indexStats(indexName: string): Promise<IndexStatistics | null>
|
||||
mergeInsert(on: Array<string>): NativeMergeInsertBuilder
|
||||
}
|
||||
@@ -1,3 +1,5 @@
|
||||
// --8<-- [start:imports]
|
||||
|
||||
use std::{iter::once, sync::Arc};
|
||||
|
||||
use arrow_array::{Float64Array, Int32Array, RecordBatch, RecordBatchIterator, StringArray};
|
||||
@@ -11,6 +13,9 @@ use lancedb::{
|
||||
Result,
|
||||
};
|
||||
|
||||
// --8<-- [end:imports]
|
||||
|
||||
// --8<-- [start:openai_embeddings]
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let tempdir = tempfile::tempdir().unwrap();
|
||||
@@ -35,7 +40,6 @@ async fn main() -> Result<()> {
|
||||
.execute()
|
||||
.await?;
|
||||
|
||||
// there is no equivalent to '.search(<query>)' yet
|
||||
let query = Arc::new(StringArray::from_iter_values(once("something warm")));
|
||||
let query_vector = embedding.compute_query_embeddings(query)?;
|
||||
let mut results = table
|
||||
@@ -53,9 +57,9 @@ async fn main() -> Result<()> {
|
||||
.unwrap();
|
||||
let text = out.iter().next().unwrap().unwrap();
|
||||
println!("Closest match: {}", text);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
// --8<-- [end:openai_embeddings]
|
||||
|
||||
fn make_data() -> impl IntoArrow {
|
||||
let schema = Schema::new(vec![
|
||||
|
||||
Reference in New Issue
Block a user