mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-24 05:49:57 +00:00
Compare commits
19 Commits
lance-14.1
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2c36767f20 | ||
|
|
1fa7e96aa1 | ||
|
|
7ae327242b | ||
|
|
1f4a051070 | ||
|
|
92c93b08bf | ||
|
|
a363b02ca7 | ||
|
|
ff8eaab894 | ||
|
|
11959cc5d6 | ||
|
|
7c65cec8d7 | ||
|
|
82621d5b13 | ||
|
|
0708428357 | ||
|
|
137d86d3c5 | ||
|
|
bb2e624ff0 | ||
|
|
fdc949bafb | ||
|
|
31be9212da | ||
|
|
cef24801f4 | ||
|
|
b4436e0804 | ||
|
|
58c2cd01a5 | ||
|
|
a1a1891c0c |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.6.0"
|
||||
current_version = "0.7.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -4,6 +4,7 @@
|
||||
**/__pycache__
|
||||
.DS_Store
|
||||
venv
|
||||
.venv
|
||||
|
||||
.vscode
|
||||
.zed
|
||||
|
||||
@@ -18,4 +18,4 @@ repos:
|
||||
language: system
|
||||
types: [text]
|
||||
files: "nodejs/.*"
|
||||
exclude: nodejs/lancedb/native.d.ts|nodejs/dist/.*
|
||||
exclude: nodejs/lancedb/native.d.ts|nodejs/dist/.*|nodejs/examples/.*
|
||||
|
||||
17
Cargo.toml
17
Cargo.toml
@@ -20,18 +20,11 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
|
||||
categories = ["database-implementations"]
|
||||
|
||||
[workspace.dependencies]
|
||||
# lance = { "version" = "=0.14.0", "features" = ["dynamodb"] }
|
||||
# lance-index = { "version" = "=0.14.0" }
|
||||
# lance-linalg = { "version" = "=0.14.0" }
|
||||
# lance-testing = { "version" = "=0.14.0" }
|
||||
# lance-datafusion = { "version" = "=0.14.0" }
|
||||
|
||||
lance = { path = "../lance/rust/lance", "features" = ["dynamodb"] }
|
||||
lance-index = { path = "../lance/rust/lance-index" }
|
||||
lance-linalg = { path = "../lance/rust/lance-linalg" }
|
||||
lance-testing = { path = "../lance/rust/lance-testing" }
|
||||
lance-datafusion = { path = "../lance/rust/lance-datafusion" }
|
||||
|
||||
lance = { "version" = "=0.14.1", "features" = ["dynamodb"] }
|
||||
lance-index = { "version" = "=0.14.1" }
|
||||
lance-linalg = { "version" = "=0.14.1" }
|
||||
lance-testing = { "version" = "=0.14.1" }
|
||||
lance-datafusion = { "version" = "=0.14.1" }
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "51.0", optional = false }
|
||||
arrow-array = "51.0"
|
||||
|
||||
@@ -102,15 +102,18 @@ nav:
|
||||
- Linear Combination Reranker: reranking/linear_combination.md
|
||||
- Cross Encoder Reranker: reranking/cross_encoder.md
|
||||
- ColBERT Reranker: reranking/colbert.md
|
||||
- Jina Reranker: reranking/jina.md
|
||||
- OpenAI Reranker: reranking/openai.md
|
||||
- Building Custom Rerankers: reranking/custom_reranker.md
|
||||
- Example: notebooks/lancedb_reranking.ipynb
|
||||
- Filtering: sql.md
|
||||
- Versioning & Reproducibility: notebooks/reproducibility.ipynb
|
||||
- Configuring Storage: guides/storage.md
|
||||
- Sync -> Async Migration Guide: migration.md
|
||||
- Migration Guide: migration.md
|
||||
- Tuning retrieval performance:
|
||||
- Choosing right query type: guides/tuning_retrievers/1_query_types.md
|
||||
- Reranking: guides/tuning_retrievers/2_reranking.md
|
||||
- Embedding fine-tuning: guides/tuning_retrievers/3_embed_tuning.md
|
||||
- 🧬 Managing embeddings:
|
||||
- Overview: embeddings/index.md
|
||||
- Embedding functions: embeddings/embedding_functions.md
|
||||
@@ -184,15 +187,18 @@ nav:
|
||||
- Linear Combination Reranker: reranking/linear_combination.md
|
||||
- Cross Encoder Reranker: reranking/cross_encoder.md
|
||||
- ColBERT Reranker: reranking/colbert.md
|
||||
- Jina Reranker: reranking/jina.md
|
||||
- OpenAI Reranker: reranking/openai.md
|
||||
- Building Custom Rerankers: reranking/custom_reranker.md
|
||||
- Example: notebooks/lancedb_reranking.ipynb
|
||||
- Filtering: sql.md
|
||||
- Versioning & Reproducibility: notebooks/reproducibility.ipynb
|
||||
- Configuring Storage: guides/storage.md
|
||||
- Sync -> Async Migration Guide: migration.md
|
||||
- Migration Guide: migration.md
|
||||
- Tuning retrieval performance:
|
||||
- Choosing right query type: guides/tuning_retrievers/1_query_types.md
|
||||
- Reranking: guides/tuning_retrievers/2_reranking.md
|
||||
- Embedding fine-tuning: guides/tuning_retrievers/3_embed_tuning.md
|
||||
- Managing Embeddings:
|
||||
- Overview: embeddings/index.md
|
||||
- Embedding functions: embeddings/embedding_functions.md
|
||||
|
||||
@@ -187,6 +187,14 @@ paths:
|
||||
type: integer
|
||||
description: |
|
||||
The refine factor to use for search. Optional.
|
||||
default: null
|
||||
fast_search:
|
||||
type: boolean
|
||||
description: |
|
||||
Whether to use fast search. Optional.
|
||||
default: false
|
||||
required:
|
||||
- vector
|
||||
|
||||
responses:
|
||||
"200":
|
||||
|
||||
@@ -38,13 +38,27 @@ Lance supports `IVF_PQ` index type by default.
|
||||
tbl.create_index(num_partitions=256, num_sub_vectors=96)
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "TypeScript"
|
||||
|
||||
```typescript
|
||||
--8<--- "docs/src/ann_indexes.ts:import"
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
--8<-- "docs/src/ann_indexes.ts:ingest"
|
||||
```
|
||||
Creating indexes is done via the [lancedb.Table.createIndex](../js/classes/Table.md/#createIndex) method.
|
||||
|
||||
```typescript
|
||||
--8<--- "nodejs/examples/ann_indexes.ts:import"
|
||||
|
||||
--8<-- "nodejs/examples/ann_indexes.ts:ingest"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
Creating indexes is done via the [lancedb.Table.createIndex](../javascript/interfaces/Table.md/#createIndex) method.
|
||||
|
||||
```typescript
|
||||
--8<--- "docs/src/ann_indexes.ts:import"
|
||||
|
||||
--8<-- "docs/src/ann_indexes.ts:ingest"
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
@@ -91,27 +105,27 @@ You can specify the GPU device to train IVF partitions via
|
||||
|
||||
=== "Linux"
|
||||
|
||||
<!-- skip-test -->
|
||||
``` { .python .copy }
|
||||
# Create index using CUDA on Nvidia GPUs.
|
||||
tbl.create_index(
|
||||
num_partitions=256,
|
||||
num_sub_vectors=96,
|
||||
accelerator="cuda"
|
||||
)
|
||||
```
|
||||
<!-- skip-test -->
|
||||
``` { .python .copy }
|
||||
# Create index using CUDA on Nvidia GPUs.
|
||||
tbl.create_index(
|
||||
num_partitions=256,
|
||||
num_sub_vectors=96,
|
||||
accelerator="cuda"
|
||||
)
|
||||
```
|
||||
|
||||
=== "MacOS"
|
||||
|
||||
<!-- skip-test -->
|
||||
```python
|
||||
# Create index using MPS on Apple Silicon.
|
||||
tbl.create_index(
|
||||
num_partitions=256,
|
||||
num_sub_vectors=96,
|
||||
accelerator="mps"
|
||||
)
|
||||
```
|
||||
<!-- skip-test -->
|
||||
```python
|
||||
# Create index using MPS on Apple Silicon.
|
||||
tbl.create_index(
|
||||
num_partitions=256,
|
||||
num_sub_vectors=96,
|
||||
accelerator="mps"
|
||||
)
|
||||
```
|
||||
|
||||
Troubleshooting:
|
||||
|
||||
@@ -150,11 +164,19 @@ There are a couple of parameters that can be used to fine-tune the search:
|
||||
1 [0.48587373, 0.269207, 0.15095535, 0.65531915,... item 3953 108.393867
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "TypeScript"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/ann_indexes.ts:search1"
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/ann_indexes.ts:search1"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/ann_indexes.ts:search1"
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
@@ -172,15 +194,23 @@ You can further filter the elements returned by a search using a where clause.
|
||||
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
tbl.search(np.random.random((1536))).where("item != 'item 1141'").to_pandas()
|
||||
```
|
||||
```python
|
||||
tbl.search(np.random.random((1536))).where("item != 'item 1141'").to_pandas()
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
--8<-- "docs/src/ann_indexes.ts:search2"
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/ann_indexes.ts:search2"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```javascript
|
||||
--8<-- "docs/src/ann_indexes.ts:search2"
|
||||
```
|
||||
|
||||
### Projections (select clause)
|
||||
|
||||
@@ -188,23 +218,31 @@ You can select the columns returned by the query using a select clause.
|
||||
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
tbl.search(np.random.random((1536))).select(["vector"]).to_pandas()
|
||||
```
|
||||
```python
|
||||
tbl.search(np.random.random((1536))).select(["vector"]).to_pandas()
|
||||
```
|
||||
|
||||
|
||||
```text
|
||||
vector _distance
|
||||
0 [0.30928212, 0.022668175, 0.1756372, 0.4911822... 93.971092
|
||||
1 [0.2525465, 0.01723831, 0.261568, 0.002007689,... 95.173485
|
||||
...
|
||||
```
|
||||
```text
|
||||
vector _distance
|
||||
0 [0.30928212, 0.022668175, 0.1756372, 0.4911822... 93.971092
|
||||
1 [0.2525465, 0.01723831, 0.261568, 0.002007689,... 95.173485
|
||||
...
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "TypeScript"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/ann_indexes.ts:search3"
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/ann_indexes.ts:search3"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/ann_indexes.ts:search3"
|
||||
```
|
||||
|
||||
## FAQ
|
||||
|
||||
|
||||
@@ -16,12 +16,43 @@
|
||||
pip install lancedb
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```shell
|
||||
npm install vectordb
|
||||
```
|
||||
```shell
|
||||
npm install @lancedb/lancedb
|
||||
```
|
||||
!!! note "Bundling `@lancedb/lancedb` apps with Webpack"
|
||||
|
||||
Since LanceDB contains a prebuilt Node binary, you must configure `next.config.js` to exclude it from webpack. This is required for both using Next.js and deploying a LanceDB app on Vercel.
|
||||
|
||||
```javascript
|
||||
/** @type {import('next').NextConfig} */
|
||||
module.exports = ({
|
||||
webpack(config) {
|
||||
config.externals.push({ '@lancedb/lancedb': '@lancedb/lancedb' })
|
||||
return config;
|
||||
}
|
||||
})
|
||||
```
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```shell
|
||||
npm install vectordb
|
||||
```
|
||||
!!! note "Bundling `vectordb` apps with Webpack"
|
||||
|
||||
Since LanceDB contains a prebuilt Node binary, you must configure `next.config.js` to exclude it from webpack. This is required for both using Next.js and deploying a LanceDB app on Vercel.
|
||||
|
||||
```javascript
|
||||
/** @type {import('next').NextConfig} */
|
||||
module.exports = ({
|
||||
webpack(config) {
|
||||
config.externals.push({ vectordb: 'vectordb' })
|
||||
return config;
|
||||
}
|
||||
})
|
||||
```
|
||||
=== "Rust"
|
||||
|
||||
```shell
|
||||
@@ -58,14 +89,21 @@ recommend switching to stable releases.
|
||||
pip install --pre --extra-index-url https://pypi.fury.io/lancedb/ lancedb
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
```shell
|
||||
npm install vectordb@preview
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```shell
|
||||
npm install @lancedb/lancedb@preview
|
||||
```
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```shell
|
||||
npm install vectordb@preview
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
|
||||
We don't push preview releases to crates.io, but you can referent the tag
|
||||
in GitHub within your Cargo dependencies:
|
||||
|
||||
@@ -93,23 +131,22 @@ recommend switching to stable releases.
|
||||
use the same syntax as the asynchronous API. To help with this migration we
|
||||
have created a [migration guide](migration.md) detailing the differences.
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:import"
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
--8<-- "docs/src/basic_legacy.ts:open_db"
|
||||
```
|
||||
```typescript
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import * as arrow from "apache-arrow";
|
||||
|
||||
!!! note "`@lancedb/lancedb` vs. `vectordb`"
|
||||
--8<-- "nodejs/examples/basic.ts:connect"
|
||||
```
|
||||
|
||||
The Javascript SDK was originally released as `vectordb`. In an effort to
|
||||
reduce maintenance we are aligning our SDKs. The new, aligned, Javascript
|
||||
API is being released as `lancedb`. If you are starting new work we encourage
|
||||
you to try out `lancedb`. Once the new API is feature complete we will begin
|
||||
slowly deprecating `vectordb` in favor of `lancedb`. There is a
|
||||
[migration guide](migration.md) detailing the differences which will assist
|
||||
you in this process.
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:open_db"
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
@@ -152,15 +189,23 @@ table.
|
||||
--8<-- "python/python/tests/docs/test_basic.py:create_table_async_pandas"
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:create_table"
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
If the table already exists, LanceDB will raise an error by default.
|
||||
If you want to overwrite the table, you can pass in `mode="overwrite"`
|
||||
to the `createTable` function.
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:create_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:create_table"
|
||||
```
|
||||
|
||||
If the table already exists, LanceDB will raise an error by default.
|
||||
If you want to overwrite the table, you can pass in `mode:"overwrite"`
|
||||
to the `createTable` function.
|
||||
|
||||
=== "Rust"
|
||||
|
||||
@@ -200,11 +245,19 @@ similar to a `CREATE TABLE` statement in SQL.
|
||||
!!! note "You can define schema in Pydantic"
|
||||
LanceDB comes with Pydantic support, which allows you to define the schema of your data using Pydantic models. This makes it easy to work with LanceDB tables and data. Learn more about all supported types in [tables guide](./guides/tables.md).
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:create_empty_table"
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:create_empty_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:create_empty_table"
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
@@ -223,11 +276,19 @@ Once created, you can open a table as follows:
|
||||
--8<-- "python/python/tests/docs/test_basic.py:open_table_async"
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:open_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
const tbl = await db.openTable("myTable");
|
||||
```
|
||||
|
||||
```typescript
|
||||
const tbl = await db.openTable("myTable");
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
@@ -244,11 +305,18 @@ If you forget the name of your table, you can always get a listing of all table
|
||||
--8<-- "python/python/tests/docs/test_basic.py:table_names_async"
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
=== "Typescript[^1]"
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```javascript
|
||||
console.log(await db.tableNames());
|
||||
```
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:table_names"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
console.log(await db.tableNames());
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
@@ -267,11 +335,18 @@ After a table has been created, you can always add more data to it as follows:
|
||||
--8<-- "python/python/tests/docs/test_basic.py:add_data_async"
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:add"
|
||||
```
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:add_data"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:add"
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
@@ -292,11 +367,18 @@ Once you've embedded the query, you can find its nearest neighbors as follows:
|
||||
|
||||
This returns a pandas DataFrame with the results.
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:search"
|
||||
```
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:vector_search"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:search"
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
@@ -325,11 +407,18 @@ LanceDB allows you to create an ANN index on a table as follows:
|
||||
--8<-- "python/python/tests/docs/test_basic.py:create_index_async"
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```{.typescript .ignore}
|
||||
--8<-- "docs/src/basic_legacy.ts:create_index"
|
||||
```
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:create_index"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```{.typescript .ignore}
|
||||
--8<-- "docs/src/basic_legacy.ts:create_index"
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
@@ -357,11 +446,19 @@ This can delete any number of rows that match the filter.
|
||||
--8<-- "python/python/tests/docs/test_basic.py:delete_rows_async"
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:delete"
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:delete_rows"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:delete"
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
@@ -378,9 +475,15 @@ simple or complex as needed. To see what expressions are supported, see the
|
||||
|
||||
Read more: [lancedb.table.Table.delete][]
|
||||
|
||||
=== "Javascript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
Read more: [vectordb.Table.delete](javascript/interfaces/Table.md#delete)
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
Read more: [lancedb.Table.delete](javascript/interfaces/Table.md#delete)
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
Read more: [vectordb.Table.delete](javascript/interfaces/Table.md#delete)
|
||||
|
||||
=== "Rust"
|
||||
|
||||
@@ -392,23 +495,31 @@ Use the `drop_table()` method on the database to remove a table.
|
||||
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:drop_table"
|
||||
--8<-- "python/python/tests/docs/test_basic.py:drop_table_async"
|
||||
```
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:drop_table"
|
||||
--8<-- "python/python/tests/docs/test_basic.py:drop_table_async"
|
||||
```
|
||||
|
||||
This permanently removes the table and is not recoverable, unlike deleting rows.
|
||||
By default, if the table does not exist an exception is raised. To suppress this,
|
||||
you can pass in `ignore_missing=True`.
|
||||
This permanently removes the table and is not recoverable, unlike deleting rows.
|
||||
By default, if the table does not exist an exception is raised. To suppress this,
|
||||
you can pass in `ignore_missing=True`.
|
||||
|
||||
=== "Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:drop_table"
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
This permanently removes the table and is not recoverable, unlike deleting rows.
|
||||
If the table does not exist an exception is raised.
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:drop_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:drop_table"
|
||||
```
|
||||
|
||||
This permanently removes the table and is not recoverable, unlike deleting rows.
|
||||
If the table does not exist an exception is raised.
|
||||
|
||||
=== "Rust"
|
||||
|
||||
@@ -416,19 +527,6 @@ Use the `drop_table()` method on the database to remove a table.
|
||||
--8<-- "rust/lancedb/examples/simple.rs:drop_table"
|
||||
```
|
||||
|
||||
!!! note "Bundling `vectordb` apps with Webpack"
|
||||
|
||||
If you're using the `vectordb` module in JavaScript, since LanceDB contains a prebuilt Node binary, you must configure `next.config.js` to exclude it from webpack. This is required for both using Next.js and deploying a LanceDB app on Vercel.
|
||||
|
||||
```javascript
|
||||
/** @type {import('next').NextConfig} */
|
||||
module.exports = ({
|
||||
webpack(config) {
|
||||
config.externals.push({ vectordb: 'vectordb' })
|
||||
return config;
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
## Using the Embedding API
|
||||
You can use the embedding API when working with embedding models. It automatically vectorizes the data at ingestion and query time and comes with built-in integrations with popular embedding models like Openai, Hugging Face, Sentence Transformers, CLIP and more.
|
||||
@@ -440,6 +538,22 @@ You can use the embedding API when working with embedding models. It automatical
|
||||
--8<-- "python/python/tests/docs/test_embeddings_optional.py:openai_embeddings"
|
||||
```
|
||||
|
||||
=== "Typescript[^1]"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/embedding.ts:imports"
|
||||
--8<-- "nodejs/examples/embedding.ts:openai_embeddings"
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
--8<-- "rust/lancedb/examples/openai.rs:imports"
|
||||
--8<-- "rust/lancedb/examples/openai.rs:openai_embeddings"
|
||||
```
|
||||
|
||||
Learn about using the existing integrations and creating custom embedding functions in the [embedding API guide](./embeddings/).
|
||||
|
||||
|
||||
@@ -448,3 +562,5 @@ Learn about using the existing integrations and creating custom embedding functi
|
||||
This section covered the very basics of using LanceDB. If you're learning about vector databases for the first time, you may want to read the page on [indexing](concepts/index_ivfpq.md) to get familiar with the concepts.
|
||||
|
||||
If you've already worked with other vector databases, you may want to read the [guides](guides/tables.md) to learn how to work with LanceDB in more detail.
|
||||
|
||||
[^1]: The `vectordb` package is a legacy package that is deprecated in favor of `@lancedb/lancedb`. The `vectordb` package will continue to receive bug fixes and security updates until September 2024. We recommend all new projects use `@lancedb/lancedb`. See the [migration guide](migration.md) for more information.
|
||||
|
||||
@@ -24,6 +24,7 @@ const example = async () => {
|
||||
);
|
||||
// --8<-- [end:create_table]
|
||||
|
||||
|
||||
// --8<-- [start:add]
|
||||
const newData = Array.from({ length: 500 }, (_, i) => ({
|
||||
vector: [i, i + 1],
|
||||
|
||||
@@ -427,6 +427,45 @@ Usage Example:
|
||||
tbl.add(data)
|
||||
```
|
||||
|
||||
### Jina Embeddings
|
||||
Jina embeddings are used to generate embeddings for text and image data.
|
||||
You also need to set the `JINA_API_KEY` environment variable to use the Jina API.
|
||||
|
||||
You can find a list of supported models under [https://jina.ai/embeddings/](https://jina.ai/embeddings/)
|
||||
|
||||
Supported parameters (to be passed in `create` method) are:
|
||||
|
||||
| Parameter | Type | Default Value | Description |
|
||||
|---|---|---|---|
|
||||
| `name` | `str` | `"jina-clip-v1"` | The model ID of the jina model to use |
|
||||
|
||||
Usage Example:
|
||||
|
||||
```python
|
||||
import os
|
||||
import lancedb
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from lancedb.embeddings import EmbeddingFunctionRegistry
|
||||
|
||||
os.environ['JINA_API_KEY'] = 'jina_*'
|
||||
|
||||
jina_embed = EmbeddingFunctionRegistry.get_instance().get("jina").create(name="jina-embeddings-v2-base-en")
|
||||
|
||||
|
||||
class TextModel(LanceModel):
|
||||
text: str = jina_embed.SourceField()
|
||||
vector: Vector(jina_embed.ndims()) = jina_embed.VectorField()
|
||||
|
||||
|
||||
data = [{"text": "hello world"},
|
||||
{"text": "goodbye world"}]
|
||||
|
||||
db = lancedb.connect("~/.lancedb-2")
|
||||
tbl = db.create_table("test", schema=TextModel, mode="overwrite")
|
||||
|
||||
tbl.add(data)
|
||||
```
|
||||
|
||||
### AWS Bedrock Text Embedding Functions
|
||||
AWS Bedrock supports multiple base models for generating text embeddings. You need to setup the AWS credentials to use this embedding function.
|
||||
You can do so by using `awscli` and also add your session_token:
|
||||
@@ -524,7 +563,7 @@ uris = [
|
||||
# get each uri as bytes
|
||||
image_bytes = [requests.get(uri).content for uri in uris]
|
||||
table.add(
|
||||
[{"label": labels, "image_uri": uris, "image_bytes": image_bytes}]
|
||||
pd.DataFrame({"label": labels, "image_uri": uris, "image_bytes": image_bytes})
|
||||
)
|
||||
```
|
||||
Now we can search using text from both the default vector column and the custom vector column
|
||||
@@ -630,3 +669,54 @@ print(actual.text == "bird")
|
||||
```
|
||||
|
||||
If you have any questions about the embeddings API, supported models, or see a relevant model missing, please raise an issue [on GitHub](https://github.com/lancedb/lancedb/issues).
|
||||
|
||||
### Jina Embeddings
|
||||
Jina embeddings can also be used to embed both text and image data, only some of the models support image data and you can check the list
|
||||
under [https://jina.ai/embeddings/](https://jina.ai/embeddings/)
|
||||
|
||||
Supported parameters (to be passed in `create` method) are:
|
||||
|
||||
| Parameter | Type | Default Value | Description |
|
||||
|---|---|---|---|
|
||||
| `name` | `str` | `"jina-clip-v1"` | The model ID of the jina model to use |
|
||||
|
||||
Usage Example:
|
||||
|
||||
```python
|
||||
import os
|
||||
import requests
|
||||
import lancedb
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from lancedb.embeddings import get_registry
|
||||
import pandas as pd
|
||||
|
||||
os.environ['JINA_API_KEY'] = 'jina_*'
|
||||
|
||||
db = lancedb.connect("~/.lancedb")
|
||||
func = get_registry().get("jina").create()
|
||||
|
||||
|
||||
class Images(LanceModel):
|
||||
label: str
|
||||
image_uri: str = func.SourceField() # image uri as the source
|
||||
image_bytes: bytes = func.SourceField() # image bytes as the source
|
||||
vector: Vector(func.ndims()) = func.VectorField() # vector column
|
||||
vec_from_bytes: Vector(func.ndims()) = func.VectorField() # Another vector column
|
||||
|
||||
|
||||
table = db.create_table("images", schema=Images)
|
||||
labels = ["cat", "cat", "dog", "dog", "horse", "horse"]
|
||||
uris = [
|
||||
"http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
|
||||
"http://farm1.staticflickr.com/134/332220238_da527d8140_z.jpg",
|
||||
"http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
|
||||
"http://farm5.staticflickr.com/4092/5017326486_1f46057f5f_z.jpg",
|
||||
"http://farm9.staticflickr.com/8216/8434969557_d37882c42d_z.jpg",
|
||||
"http://farm6.staticflickr.com/5142/5835678453_4f3a4edb45_z.jpg",
|
||||
]
|
||||
# get each uri as bytes
|
||||
image_bytes = [requests.get(uri).content for uri in uris]
|
||||
table.add(
|
||||
pd.DataFrame({"label": labels, "image_uri": uris, "image_bytes": image_bytes})
|
||||
)
|
||||
```
|
||||
@@ -6,8 +6,8 @@ For this purpose, LanceDB introduces an **embedding functions API**, that allow
|
||||
LanceDB Cloud does not support embedding functions yet. You need to generate embeddings before ingesting into the table or querying.
|
||||
|
||||
!!! warning
|
||||
Using the embedding function registry means that you don't have to explicitly generate the embeddings yourself.
|
||||
However, if your embedding function changes, you'll have to re-configure your table with the new embedding function
|
||||
Using the embedding function registry means that you don't have to explicitly generate the embeddings yourself.
|
||||
However, if your embedding function changes, you'll have to re-configure your table with the new embedding function
|
||||
and regenerate the embeddings. In the future, we plan to support the ability to change the embedding function via
|
||||
table metadata and have LanceDB automatically take care of regenerating the embeddings.
|
||||
|
||||
@@ -16,7 +16,7 @@ For this purpose, LanceDB introduces an **embedding functions API**, that allow
|
||||
|
||||
=== "Python"
|
||||
In the LanceDB python SDK, we define a global embedding function registry with
|
||||
many different embedding models and even more coming soon.
|
||||
many different embedding models and even more coming soon.
|
||||
Here's let's an implementation of CLIP as example.
|
||||
|
||||
```python
|
||||
@@ -26,20 +26,35 @@ For this purpose, LanceDB introduces an **embedding functions API**, that allow
|
||||
clip = registry.get("open-clip").create()
|
||||
```
|
||||
|
||||
You can also define your own embedding function by implementing the `EmbeddingFunction`
|
||||
You can also define your own embedding function by implementing the `EmbeddingFunction`
|
||||
abstract base interface. It subclasses Pydantic Model which can be utilized to write complex schemas simply as we'll see next!
|
||||
|
||||
=== "JavaScript""
|
||||
=== "TypeScript"
|
||||
In the TypeScript SDK, the choices are more limited. For now, only the OpenAI
|
||||
embedding function is available.
|
||||
|
||||
```javascript
|
||||
const lancedb = require("vectordb");
|
||||
import * as lancedb from '@lancedb/lancedb'
|
||||
import { getRegistry } from '@lancedb/lancedb/embeddings'
|
||||
|
||||
// You need to provide an OpenAI API key
|
||||
const apiKey = "sk-..."
|
||||
// The embedding function will create embeddings for the 'text' column
|
||||
const embedding = new lancedb.OpenAIEmbeddingFunction('text', apiKey)
|
||||
const func = getRegistry().get("openai").create({apiKey})
|
||||
```
|
||||
=== "Rust"
|
||||
In the Rust SDK, the choices are more limited. For now, only the OpenAI
|
||||
embedding function is available. But unlike the Python and TypeScript SDKs, you need manually register the OpenAI embedding function.
|
||||
|
||||
```toml
|
||||
// Make sure to include the `openai` feature
|
||||
[dependencies]
|
||||
lancedb = {version = "*", features = ["openai"]}
|
||||
```
|
||||
|
||||
```rust
|
||||
--8<-- "rust/lancedb/examples/openai.rs:imports"
|
||||
--8<-- "rust/lancedb/examples/openai.rs:openai_embeddings"
|
||||
```
|
||||
|
||||
## 2. Define the data model or schema
|
||||
@@ -55,14 +70,14 @@ For this purpose, LanceDB introduces an **embedding functions API**, that allow
|
||||
|
||||
`VectorField` tells LanceDB to use the clip embedding function to generate query embeddings for the `vector` column and `SourceField` ensures that when adding data, we automatically use the specified embedding function to encode `image_uri`.
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
For the TypeScript SDK, a schema can be inferred from input data, or an explicit
|
||||
Arrow schema can be provided.
|
||||
|
||||
## 3. Create table and add data
|
||||
|
||||
Now that we have chosen/defined our embedding function and the schema,
|
||||
Now that we have chosen/defined our embedding function and the schema,
|
||||
we can create the table and ingest data without needing to explicitly generate
|
||||
the embeddings at all:
|
||||
|
||||
@@ -74,17 +89,26 @@ the embeddings at all:
|
||||
table.add([{"image_uri": u} for u in uris])
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
const db = await lancedb.connect("data/sample-lancedb");
|
||||
const data = [
|
||||
{ text: "pepperoni"},
|
||||
{ text: "pineapple"}
|
||||
]
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
const table = await db.createTable("vectors", data, embedding)
|
||||
```
|
||||
```ts
|
||||
--8<-- "nodejs/examples/embedding.ts:imports"
|
||||
--8<-- "nodejs/examples/embedding.ts:embedding_function"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
const db = await lancedb.connect("data/sample-lancedb");
|
||||
const data = [
|
||||
{ text: "pepperoni"},
|
||||
{ text: "pineapple"}
|
||||
]
|
||||
|
||||
const table = await db.createTable("vectors", data, embedding)
|
||||
```
|
||||
|
||||
## 4. Querying your table
|
||||
Not only can you forget about the embeddings during ingestion, you also don't
|
||||
@@ -97,8 +121,8 @@ need to worry about it when you query the table:
|
||||
```python
|
||||
results = (
|
||||
table.search("dog")
|
||||
.limit(10)
|
||||
.to_pandas()
|
||||
.limit(10)
|
||||
.to_pandas()
|
||||
)
|
||||
```
|
||||
|
||||
@@ -109,22 +133,32 @@ need to worry about it when you query the table:
|
||||
query_image = Image.open(p)
|
||||
results = (
|
||||
table.search(query_image)
|
||||
.limit(10)
|
||||
.to_pandas()
|
||||
.limit(10)
|
||||
.to_pandas()
|
||||
)
|
||||
```
|
||||
|
||||
Both of the above snippet returns a pandas DataFrame with the 10 closest vectors to the query.
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
const results = await table.search("What's the best pizza topping?")
|
||||
.limit(10)
|
||||
.toArray()
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)
|
||||
|
||||
```ts
|
||||
const results = await table
|
||||
.search("What's the best pizza topping?")
|
||||
.limit(10)
|
||||
.execute()
|
||||
```
|
||||
|
||||
```javascript
|
||||
const results = await table
|
||||
.search("What's the best pizza topping?")
|
||||
.limit(10)
|
||||
.execute()
|
||||
```
|
||||
|
||||
The above snippet returns an array of records with the top 10 nearest neighbors to the query.
|
||||
|
||||
---
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
Due to the nature of vector embeddings, they can be used to represent any kind of data, from text to images to audio.
|
||||
This makes them a very powerful tool for machine learning practitioners.
|
||||
However, there's no one-size-fits-all solution for generating embeddings - there are many different libraries and APIs
|
||||
Due to the nature of vector embeddings, they can be used to represent any kind of data, from text to images to audio.
|
||||
This makes them a very powerful tool for machine learning practitioners.
|
||||
However, there's no one-size-fits-all solution for generating embeddings - there are many different libraries and APIs
|
||||
(both commercial and open source) that can be used to generate embeddings from structured/unstructured data.
|
||||
|
||||
LanceDB supports 3 methods of working with embeddings.
|
||||
|
||||
1. You can manually generate embeddings for the data and queries. This is done outside of LanceDB.
|
||||
2. You can use the built-in [embedding functions](./embedding_functions.md) to embed the data and queries in the background.
|
||||
3. For python users, you can define your own [custom embedding function](./custom_embedding_function.md)
|
||||
3. You can define your own [custom embedding function](./custom_embedding_function.md)
|
||||
that extends the default embedding functions.
|
||||
|
||||
For python users, there is also a legacy [with_embeddings API](./legacy.md).
|
||||
@@ -18,62 +18,89 @@ It is retained for compatibility and will be removed in a future version.
|
||||
To get started with embeddings, you can use the built-in embedding functions.
|
||||
|
||||
### OpenAI Embedding function
|
||||
|
||||
LanceDB registers the OpenAI embeddings function in the registry as `openai`. You can pass any supported model name to the `create`. By default it uses `"text-embedding-ada-002"`.
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from lancedb.embeddings import get_registry
|
||||
=== "Python"
|
||||
|
||||
db = lancedb.connect("/tmp/db")
|
||||
func = get_registry().get("openai").create(name="text-embedding-ada-002")
|
||||
```python
|
||||
import lancedb
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from lancedb.embeddings import get_registry
|
||||
|
||||
class Words(LanceModel):
|
||||
text: str = func.SourceField()
|
||||
vector: Vector(func.ndims()) = func.VectorField()
|
||||
db = lancedb.connect("/tmp/db")
|
||||
func = get_registry().get("openai").create(name="text-embedding-ada-002")
|
||||
|
||||
table = db.create_table("words", schema=Words, mode="overwrite")
|
||||
table.add(
|
||||
[
|
||||
{"text": "hello world"},
|
||||
{"text": "goodbye world"}
|
||||
]
|
||||
)
|
||||
class Words(LanceModel):
|
||||
text: str = func.SourceField()
|
||||
vector: Vector(func.ndims()) = func.VectorField()
|
||||
|
||||
query = "greetings"
|
||||
actual = table.search(query).limit(1).to_pydantic(Words)[0]
|
||||
print(actual.text)
|
||||
```
|
||||
table = db.create_table("words", schema=Words, mode="overwrite")
|
||||
table.add(
|
||||
[
|
||||
{"text": "hello world"},
|
||||
{"text": "goodbye world"}
|
||||
]
|
||||
)
|
||||
|
||||
query = "greetings"
|
||||
actual = table.search(query).limit(1).to_pydantic(Words)[0]
|
||||
print(actual.text)
|
||||
```
|
||||
|
||||
=== "TypeScript"
|
||||
|
||||
```typescript
|
||||
--8<--- "nodejs/examples/embedding.ts:imports"
|
||||
--8<--- "nodejs/examples/embedding.ts:openai_embeddings"
|
||||
```
|
||||
|
||||
=== "Rust"
|
||||
|
||||
```rust
|
||||
--8<--- "rust/lancedb/examples/openai.rs:imports"
|
||||
--8<--- "rust/lancedb/examples/openai.rs:openai_embeddings"
|
||||
```
|
||||
|
||||
### Sentence Transformers Embedding function
|
||||
LanceDB registers the Sentence Transformers embeddings function in the registry as `sentence-transformers`. You can pass any supported model name to the `create`. By default it uses `"sentence-transformers/paraphrase-MiniLM-L6-v2"`.
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from lancedb.embeddings import get_registry
|
||||
=== "Python"
|
||||
```python
|
||||
import lancedb
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from lancedb.embeddings import get_registry
|
||||
|
||||
db = lancedb.connect("/tmp/db")
|
||||
model = get_registry().get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu")
|
||||
db = lancedb.connect("/tmp/db")
|
||||
model = get_registry().get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu")
|
||||
|
||||
class Words(LanceModel):
|
||||
text: str = model.SourceField()
|
||||
vector: Vector(model.ndims()) = model.VectorField()
|
||||
class Words(LanceModel):
|
||||
text: str = model.SourceField()
|
||||
vector: Vector(model.ndims()) = model.VectorField()
|
||||
|
||||
table = db.create_table("words", schema=Words)
|
||||
table.add(
|
||||
[
|
||||
{"text": "hello world"},
|
||||
{"text": "goodbye world"}
|
||||
]
|
||||
)
|
||||
table = db.create_table("words", schema=Words)
|
||||
table.add(
|
||||
[
|
||||
{"text": "hello world"},
|
||||
{"text": "goodbye world"}
|
||||
]
|
||||
)
|
||||
|
||||
query = "greetings"
|
||||
actual = table.search(query).limit(1).to_pydantic(Words)[0]
|
||||
print(actual.text)
|
||||
```
|
||||
query = "greetings"
|
||||
actual = table.search(query).limit(1).to_pydantic(Words)[0]
|
||||
print(actual.text)
|
||||
```
|
||||
|
||||
=== "TypeScript"
|
||||
|
||||
Coming Soon!
|
||||
|
||||
=== "Rust"
|
||||
|
||||
Coming Soon!
|
||||
|
||||
### Jina Embeddings
|
||||
|
||||
LanceDB registers the JinaAI embeddings function in the registry as `jina`. You can pass any supported model name to the `create`. By default it uses `"jina-clip-v1"`.
|
||||
`jina-clip-v1` can handle both text and images and other models only support `text`.
|
||||
|
||||
@@ -104,4 +131,4 @@ table.add(
|
||||
query = "greetings"
|
||||
actual = table.search(query).limit(1).to_pydantic(Words)[0]
|
||||
print(actual.text)
|
||||
```
|
||||
```
|
||||
|
||||
@@ -32,28 +32,54 @@ LanceDB OSS supports object stores such as AWS S3 (and compatible stores), Azure
|
||||
db = lancedb.connect("az://bucket/path")
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
AWS S3:
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```javascript
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("s3://bucket/path");
|
||||
```
|
||||
AWS S3:
|
||||
|
||||
Google Cloud Storage:
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect("s3://bucket/path");
|
||||
```
|
||||
|
||||
```javascript
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("gs://bucket/path");
|
||||
```
|
||||
Google Cloud Storage:
|
||||
|
||||
Azure Blob Storage:
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect("gs://bucket/path");
|
||||
```
|
||||
|
||||
```javascript
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("az://bucket/path");
|
||||
```
|
||||
Azure Blob Storage:
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect("az://bucket/path");
|
||||
```
|
||||
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
AWS S3:
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("s3://bucket/path");
|
||||
```
|
||||
|
||||
Google Cloud Storage:
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("gs://bucket/path");
|
||||
```
|
||||
|
||||
Azure Blob Storage:
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("az://bucket/path");
|
||||
```
|
||||
|
||||
In most cases, when running in the respective cloud and permissions are set up correctly, no additional configuration is required. When running outside of the respective cloud, authentication credentials must be provided. Credentials and other configuration options can be set in two ways: first, by setting environment variables. And second, by passing a `storage_options` object to the `connect` function. For example, to increase the request timeout to 60 seconds, you can set the `TIMEOUT` environment variable to `60s`:
|
||||
|
||||
@@ -78,13 +104,26 @@ If you only want this to apply to one particular connection, you can pass the `s
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("s3://bucket/path",
|
||||
{storageOptions: {timeout: "60s"}});
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
|
||||
const db = await lancedb.connect("s3://bucket/path", {
|
||||
storageOptions: {timeout: "60s"}
|
||||
});
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("s3://bucket/path", {
|
||||
storageOptions: {timeout: "60s"}
|
||||
});
|
||||
```
|
||||
|
||||
Getting even more specific, you can set the `timeout` for only a particular table:
|
||||
|
||||
@@ -101,18 +140,33 @@ Getting even more specific, you can set the `timeout` for only a particular tabl
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
<!-- skip-test -->
|
||||
```javascript
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("s3://bucket/path");
|
||||
const table = db.createTable(
|
||||
"table",
|
||||
[{ a: 1, b: 2}],
|
||||
{storageOptions: {timeout: "60s"}}
|
||||
);
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
<!-- skip-test -->
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect("s3://bucket/path");
|
||||
const table = db.createTable(
|
||||
"table",
|
||||
[{ a: 1, b: 2}],
|
||||
{storageOptions: {timeout: "60s"}}
|
||||
);
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
<!-- skip-test -->
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect("s3://bucket/path");
|
||||
const table = db.createTable(
|
||||
"table",
|
||||
[{ a: 1, b: 2}],
|
||||
{storageOptions: {timeout: "60s"}}
|
||||
);
|
||||
```
|
||||
|
||||
!!! info "Storage option casing"
|
||||
|
||||
@@ -135,7 +189,6 @@ There are several options that can be set for all object stores, mostly related
|
||||
| `proxy_ca_certificate` | PEM-formatted CA certificate for proxy connections. |
|
||||
| `proxy_excludes` | List of hosts that bypass the proxy. This is a comma-separated list of domains and IP masks. Any subdomain of the provided domain will be bypassed. For example, `example.com, 192.168.1.0/24` would bypass `https://api.example.com`, `https://www.example.com`, and any IP in the range `192.168.1.0/24`. |
|
||||
|
||||
|
||||
### AWS S3
|
||||
|
||||
To configure credentials for AWS S3, you can use the `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and `AWS_SESSION_TOKEN` keys. Region can also be set, but it is not mandatory when using AWS.
|
||||
@@ -155,21 +208,39 @@ These can be set as environment variables or passed in the `storage_options` par
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect(
|
||||
"s3://bucket/path",
|
||||
{
|
||||
storageOptions: {
|
||||
awsAccessKeyId: "my-access-key",
|
||||
awsSecretAccessKey: "my-secret-key",
|
||||
awsSessionToken: "my-session-token",
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect(
|
||||
"s3://bucket/path",
|
||||
{
|
||||
storageOptions: {
|
||||
awsAccessKeyId: "my-access-key",
|
||||
awsSecretAccessKey: "my-secret-key",
|
||||
awsSessionToken: "my-session-token",
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
```
|
||||
);
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect(
|
||||
"s3://bucket/path",
|
||||
{
|
||||
storageOptions: {
|
||||
awsAccessKeyId: "my-access-key",
|
||||
awsSecretAccessKey: "my-secret-key",
|
||||
awsSessionToken: "my-session-token",
|
||||
}
|
||||
}
|
||||
);
|
||||
```
|
||||
|
||||
Alternatively, if you are using AWS SSO, you can use the `AWS_PROFILE` and `AWS_DEFAULT_REGION` environment variables.
|
||||
|
||||
@@ -188,7 +259,6 @@ The following keys can be used as both environment variables or keys in the `sto
|
||||
| `aws_sse_kms_key_id` | The KMS key ID to use for server-side encryption. If set, `aws_server_side_encryption` must be `"aws:kms"` or `"aws:kms:dsse"`. |
|
||||
| `aws_sse_bucket_key_enabled` | Whether to use bucket keys for server-side encryption. |
|
||||
|
||||
|
||||
!!! tip "Automatic cleanup for failed writes"
|
||||
|
||||
LanceDB uses [multi-part uploads](https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html) when writing data to S3 in order to maximize write speed. LanceDB will abort these uploads when it shuts down gracefully, such as when cancelled by keyboard interrupt. However, in the rare case that LanceDB crashes, it is possible that some data will be left lingering in your account. To cleanup this data, we recommend (as AWS themselves do) that you setup a lifecycle rule to delete in-progress uploads after 7 days. See the AWS guide:
|
||||
@@ -384,20 +454,37 @@ LanceDB can also connect to S3-compatible stores, such as MinIO. To do so, you m
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect(
|
||||
"s3://bucket/path",
|
||||
{
|
||||
storageOptions: {
|
||||
region: "us-east-1",
|
||||
endpoint: "http://minio:9000",
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect(
|
||||
"s3://bucket/path",
|
||||
{
|
||||
storageOptions: {
|
||||
region: "us-east-1",
|
||||
endpoint: "http://minio:9000",
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
```
|
||||
);
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect(
|
||||
"s3://bucket/path",
|
||||
{
|
||||
storageOptions: {
|
||||
region: "us-east-1",
|
||||
endpoint: "http://minio:9000",
|
||||
}
|
||||
}
|
||||
);
|
||||
```
|
||||
|
||||
This can also be done with the ``AWS_ENDPOINT`` and ``AWS_DEFAULT_REGION`` environment variables.
|
||||
|
||||
@@ -428,21 +515,37 @@ To configure LanceDB to use an S3 Express endpoint, you must set the storage opt
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect(
|
||||
"s3://my-bucket--use1-az4--x-s3/path",
|
||||
{
|
||||
storageOptions: {
|
||||
region: "us-east-1",
|
||||
s3Express: "true",
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect(
|
||||
"s3://my-bucket--use1-az4--x-s3/path",
|
||||
{
|
||||
storageOptions: {
|
||||
region: "us-east-1",
|
||||
s3Express: "true",
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
```
|
||||
);
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect(
|
||||
"s3://my-bucket--use1-az4--x-s3/path",
|
||||
{
|
||||
storageOptions: {
|
||||
region: "us-east-1",
|
||||
s3Express: "true",
|
||||
}
|
||||
}
|
||||
);
|
||||
```
|
||||
|
||||
### Google Cloud Storage
|
||||
|
||||
@@ -461,26 +564,40 @@ GCS credentials are configured by setting the `GOOGLE_SERVICE_ACCOUNT` environme
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect(
|
||||
"gs://my-bucket/my-database",
|
||||
{
|
||||
storageOptions: {
|
||||
serviceAccount: "path/to/service-account.json",
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect(
|
||||
"gs://my-bucket/my-database",
|
||||
{
|
||||
storageOptions: {
|
||||
serviceAccount: "path/to/service-account.json",
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
```
|
||||
);
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect(
|
||||
"gs://my-bucket/my-database",
|
||||
{
|
||||
storageOptions: {
|
||||
serviceAccount: "path/to/service-account.json",
|
||||
}
|
||||
}
|
||||
);
|
||||
```
|
||||
|
||||
!!! info "HTTP/2 support"
|
||||
|
||||
By default, GCS uses HTTP/1 for communication, as opposed to HTTP/2. This improves maximum throughput significantly. However, if you wish to use HTTP/2 for some reason, you can set the environment variable `HTTP1_ONLY` to `false`.
|
||||
|
||||
|
||||
The following keys can be used as both environment variables or keys in the `storage_options` parameter:
|
||||
<!-- source: https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html -->
|
||||
|
||||
@@ -490,7 +607,6 @@ The following keys can be used as both environment variables or keys in the `sto
|
||||
| ``google_service_account_key`` | The serialized service account key. |
|
||||
| ``google_application_credentials`` | Path to the application credentials. |
|
||||
|
||||
|
||||
### Azure Blob Storage
|
||||
|
||||
Azure Blob Storage credentials can be configured by setting the `AZURE_STORAGE_ACCOUNT_NAME`and `AZURE_STORAGE_ACCOUNT_KEY` environment variables. Alternatively, you can pass the account name and key in the `storage_options` parameter:
|
||||
@@ -509,20 +625,37 @@ Azure Blob Storage credentials can be configured by setting the `AZURE_STORAGE_A
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect(
|
||||
"az://my-container/my-database",
|
||||
{
|
||||
storageOptions: {
|
||||
accountName: "some-account",
|
||||
accountKey: "some-key",
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
const db = await lancedb.connect(
|
||||
"az://my-container/my-database",
|
||||
{
|
||||
storageOptions: {
|
||||
accountName: "some-account",
|
||||
accountKey: "some-key",
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
```
|
||||
);
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
const lancedb = require("lancedb");
|
||||
const db = await lancedb.connect(
|
||||
"az://my-container/my-database",
|
||||
{
|
||||
storageOptions: {
|
||||
accountName: "some-account",
|
||||
accountKey: "some-key",
|
||||
}
|
||||
}
|
||||
);
|
||||
```
|
||||
|
||||
These keys can be used as both environment variables or keys in the `storage_options` parameter:
|
||||
|
||||
@@ -547,4 +680,4 @@ These keys can be used as both environment variables or keys in the `storage_opt
|
||||
| ``azure_use_azure_cli`` | Use azure cli for acquiring access token. |
|
||||
| ``azure_disable_tagging`` | Disables tagging objects. This can be desirable if not supported by the backing store. |
|
||||
|
||||
<!-- TODO: demonstrate how to configure networked file systems for optimal performance -->
|
||||
<!-- TODO: demonstrate how to configure networked file systems for optimal performance -->
|
||||
|
||||
@@ -3,32 +3,45 @@
|
||||
|
||||
A Table is a collection of Records in a LanceDB Database. Tables in Lance have a schema that defines the columns and their types. These schemas can include nested columns and can evolve over time.
|
||||
|
||||
This guide will show how to create tables, insert data into them, and update the data.
|
||||
This guide will show how to create tables, insert data into them, and update the data.
|
||||
|
||||
|
||||
## Creating a LanceDB Table
|
||||
|
||||
Initialize a LanceDB connection and create a table
|
||||
|
||||
=== "Python"
|
||||
Initialize a LanceDB connection and create a table using one of the many methods listed below.
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
db = lancedb.connect("./.lancedb")
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
|
||||
Initialize a VectorDB connection and create a table using one of the many methods listed below.
|
||||
|
||||
```javascript
|
||||
const lancedb = require("vectordb");
|
||||
|
||||
const uri = "data/sample-lancedb";
|
||||
const db = await lancedb.connect(uri);
|
||||
```
|
||||
|
||||
LanceDB allows ingesting data from various sources - `dict`, `list[dict]`, `pd.DataFrame`, `pa.Table` or a `Iterator[pa.RecordBatch]`. Let's take a look at some of the these.
|
||||
|
||||
=== "Typescript[^1]"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import * as arrow from "apache-arrow";
|
||||
|
||||
const uri = "data/sample-lancedb";
|
||||
const db = await lancedb.connect(uri);
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
const lancedb = require("vectordb");
|
||||
|
||||
const uri = "data/sample-lancedb";
|
||||
const db = await lancedb.connect(uri);
|
||||
```
|
||||
|
||||
|
||||
|
||||
### From list of tuples or dictionaries
|
||||
|
||||
=== "Python"
|
||||
@@ -45,74 +58,104 @@ This guide will show how to create tables, insert data into them, and update the
|
||||
|
||||
db["my_table"].head()
|
||||
```
|
||||
|
||||
!!! info "Note"
|
||||
If the table already exists, LanceDB will raise an error by default.
|
||||
If the table already exists, LanceDB will raise an error by default.
|
||||
|
||||
`create_table` supports an optional `exist_ok` parameter. When set to True
|
||||
and the table exists, then it simply opens the existing table. The data you
|
||||
passed in will NOT be appended to the table in that case.
|
||||
|
||||
```python
|
||||
db.create_table("name", data, exist_ok=True)
|
||||
```
|
||||
|
||||
Sometimes you want to make sure that you start fresh. If you want to
|
||||
overwrite the table, you can pass in mode="overwrite" to the createTable function.
|
||||
|
||||
```python
|
||||
db.create_table("name", data, mode="overwrite")
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
You can create a LanceDB table in JavaScript using an array of JSON records as follows.
|
||||
|
||||
```javascript
|
||||
const tb = await db.createTable("my_table", [{
|
||||
"vector": [3.1, 4.1],
|
||||
"item": "foo",
|
||||
"price": 10.0
|
||||
}, {
|
||||
"vector": [5.9, 26.5],
|
||||
"item": "bar",
|
||||
"price": 20.0
|
||||
}]);
|
||||
```
|
||||
!!! info "Note"
|
||||
If the table already exists, LanceDB will raise an error by default. If you want to overwrite the table, you need to specify the `WriteMode` in the createTable function.
|
||||
|
||||
```javascript
|
||||
const table = await con.createTable(tableName, data, { writeMode: WriteMode.Overwrite })
|
||||
```python
|
||||
db.create_table("name", data, exist_ok=True)
|
||||
```
|
||||
|
||||
### From a Pandas DataFrame
|
||||
Sometimes you want to make sure that you start fresh. If you want to
|
||||
overwrite the table, you can pass in mode="overwrite" to the createTable function.
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
|
||||
data = pd.DataFrame({
|
||||
"vector": [[1.1, 1.2, 1.3, 1.4], [0.2, 1.8, 0.4, 3.6]],
|
||||
"lat": [45.5, 40.1],
|
||||
"long": [-122.7, -74.1]
|
||||
})
|
||||
|
||||
db.create_table("my_table", data)
|
||||
|
||||
db["my_table"].head()
|
||||
db.create_table("name", data, mode="overwrite")
|
||||
```
|
||||
!!! info "Note"
|
||||
|
||||
=== "Typescript[^1]"
|
||||
You can create a LanceDB table in JavaScript using an array of records as follows.
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/basic.ts:create_table"
|
||||
```
|
||||
|
||||
This will infer the schema from the provided data. If you want to explicitly provide a schema, you can use `apache-arrow` to declare a schema
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/basic.ts:create_table_with_schema"
|
||||
```
|
||||
|
||||
!!! info "Note"
|
||||
`createTable` supports an optional `existsOk` parameter. When set to true
|
||||
and the table exists, then it simply opens the existing table. The data you
|
||||
passed in will NOT be appended to the table in that case.
|
||||
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/basic.ts:create_table_exists_ok"
|
||||
```
|
||||
|
||||
Sometimes you want to make sure that you start fresh. If you want to
|
||||
overwrite the table, you can pass in mode: "overwrite" to the createTable function.
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/basic.ts:create_table_overwrite"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
--8<-- "docs/src/basic_legacy.ts:create_table"
|
||||
```
|
||||
|
||||
!!! warning
|
||||
`existsOk` option is not supported in `vectordb`
|
||||
|
||||
Sometimes you want to make sure that you start fresh. If you want to
|
||||
overwrite the table, you can pass in mode: "overwrite" to the createTable function.
|
||||
|
||||
```ts
|
||||
const table = await con.createTable(tableName, data, { writeMode: WriteMode.Overwrite })
|
||||
```
|
||||
|
||||
### From a Pandas DataFrame
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
|
||||
data = pd.DataFrame({
|
||||
"vector": [[1.1, 1.2, 1.3, 1.4], [0.2, 1.8, 0.4, 3.6]],
|
||||
"lat": [45.5, 40.1],
|
||||
"long": [-122.7, -74.1]
|
||||
})
|
||||
|
||||
db.create_table("my_table", data)
|
||||
|
||||
db["my_table"].head()
|
||||
```
|
||||
|
||||
!!! info "Note"
|
||||
Data is converted to Arrow before being written to disk. For maximum control over how data is saved, either provide the PyArrow schema to convert to or else provide a PyArrow Table directly.
|
||||
|
||||
The **`vector`** column needs to be a [Vector](../python/pydantic.md#vector-field) (defined as [pyarrow.FixedSizeList](https://arrow.apache.org/docs/python/generated/pyarrow.list_.html)) type.
|
||||
The **`vector`** column needs to be a [Vector](../python/pydantic.md#vector-field) (defined as [pyarrow.FixedSizeList](https://arrow.apache.org/docs/python/generated/pyarrow.list_.html)) type.
|
||||
|
||||
```python
|
||||
custom_schema = pa.schema([
|
||||
pa.field("vector", pa.list_(pa.float32(), 4)),
|
||||
pa.field("lat", pa.float32()),
|
||||
pa.field("long", pa.float32())
|
||||
])
|
||||
```python
|
||||
custom_schema = pa.schema([
|
||||
pa.field("vector", pa.list_(pa.float32(), 4)),
|
||||
pa.field("lat", pa.float32()),
|
||||
pa.field("long", pa.float32())
|
||||
])
|
||||
|
||||
table = db.create_table("my_table", data, schema=custom_schema)
|
||||
```
|
||||
table = db.create_table("my_table", data, schema=custom_schema)
|
||||
```
|
||||
|
||||
### From a Polars DataFrame
|
||||
|
||||
@@ -133,14 +176,15 @@ table = db.create_table("pl_table", data=data)
|
||||
```
|
||||
|
||||
### From an Arrow Table
|
||||
You can also create LanceDB tables directly from Arrow tables.
|
||||
LanceDB supports float16 data type!
|
||||
|
||||
=== "Python"
|
||||
You can also create LanceDB tables directly from Arrow tables.
|
||||
LanceDB supports float16 data type!
|
||||
|
||||
```python
|
||||
import pyarrows as pa
|
||||
import numpy as np
|
||||
|
||||
|
||||
dim = 16
|
||||
total = 2
|
||||
schema = pa.schema(
|
||||
@@ -160,13 +204,19 @@ table = db.create_table("pl_table", data=data)
|
||||
tbl = db.create_table("f16_tbl", data, schema=schema)
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
You can also create LanceDB tables directly from Arrow tables.
|
||||
LanceDB supports Float16 data type!
|
||||
=== "Typescript[^1]"
|
||||
|
||||
```javascript
|
||||
--8<-- "docs/src/basic_legacy.ts:create_f16_table"
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:create_f16_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:create_f16_table"
|
||||
```
|
||||
|
||||
### From Pydantic Models
|
||||
|
||||
@@ -225,7 +275,7 @@ class NestedSchema(LanceModel):
|
||||
tbl = db.create_table("nested_table", schema=NestedSchema, mode="overwrite")
|
||||
```
|
||||
|
||||
This creates a struct column called "document" that has two subfields
|
||||
This creates a struct column called "document" that has two subfields
|
||||
called "content" and "source":
|
||||
|
||||
```
|
||||
@@ -236,7 +286,7 @@ vector: fixed_size_list<item: float>[1536] not null
|
||||
child 0, item: float
|
||||
document: struct<content: string not null, source: string not null> not null
|
||||
child 0, content: string not null
|
||||
child 1, source: string not null
|
||||
child 1, source: string not null
|
||||
```
|
||||
|
||||
#### Validators
|
||||
@@ -261,7 +311,7 @@ class TestModel(LanceModel):
|
||||
@classmethod
|
||||
def tz_must_match(cls, dt: datetime) -> datetime:
|
||||
assert dt.tzinfo == tz
|
||||
return dt
|
||||
return dt
|
||||
|
||||
ok = TestModel(dt_with_tz=datetime.now(tz))
|
||||
|
||||
@@ -329,23 +379,24 @@ You can also use iterators of other types like Pandas DataFrame or Pylists direc
|
||||
tbl = db.open_table("my_table")
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
If you forget the name of your table, you can always get a listing of all table names.
|
||||
|
||||
```javascript
|
||||
```typescript
|
||||
console.log(await db.tableNames());
|
||||
```
|
||||
|
||||
Then, you can open any existing tables.
|
||||
|
||||
```javascript
|
||||
```typescript
|
||||
const tbl = await db.openTable("my_table");
|
||||
```
|
||||
|
||||
## Creating empty table
|
||||
You can create an empty table for scenarios where you want to add data to the table later. An example would be when you want to collect data from a stream/external file and then add it to a table in batches.
|
||||
|
||||
=== "Python"
|
||||
In Python, you can create an empty table for scenarios where you want to add data to the table later. An example would be when you want to collect data from a stream/external file and then add it to a table in batches.
|
||||
|
||||
```python
|
||||
|
||||
@@ -364,8 +415,8 @@ You can also use iterators of other types like Pandas DataFrame or Pylists direc
|
||||
tbl = db.create_table("empty_table_add", schema=schema)
|
||||
```
|
||||
|
||||
Alternatively, you can also use Pydantic to specify the schema for the empty table. Note that we do not
|
||||
directly import `pydantic` but instead use `lancedb.pydantic` which is a subclass of `pydantic.BaseModel`
|
||||
Alternatively, you can also use Pydantic to specify the schema for the empty table. Note that we do not
|
||||
directly import `pydantic` but instead use `lancedb.pydantic` which is a subclass of `pydantic.BaseModel`
|
||||
that has been extended to support LanceDB specific types like `Vector`.
|
||||
|
||||
```python
|
||||
@@ -382,9 +433,23 @@ You can also use iterators of other types like Pandas DataFrame or Pylists direc
|
||||
|
||||
Once the empty table has been created, you can add data to it via the various methods listed in the [Adding to a table](#adding-to-a-table) section.
|
||||
|
||||
=== "Typescript[^1]"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/basic.ts:create_empty_table"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:create_empty_table"
|
||||
```
|
||||
|
||||
## Adding to a table
|
||||
|
||||
After a table has been created, you can always add more data to it using the various methods available.
|
||||
After a table has been created, you can always add more data to it usind the `add` method
|
||||
|
||||
=== "Python"
|
||||
You can add any of the valid data structures accepted by LanceDB table, i.e, `dict`, `list[dict]`, `pd.DataFrame`, or `Iterator[pa.RecordBatch]`. Below are some examples.
|
||||
@@ -472,9 +537,7 @@ After a table has been created, you can always add more data to it using the var
|
||||
tbl.add(models)
|
||||
```
|
||||
|
||||
|
||||
|
||||
=== "JavaScript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
```javascript
|
||||
await tbl.add(
|
||||
@@ -530,15 +593,15 @@ Use the `delete()` method on tables to delete rows from a table. To choose which
|
||||
# 0 3 [5.0, 6.0]
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
```javascript
|
||||
```ts
|
||||
await tbl.delete('item = "fizz"')
|
||||
```
|
||||
|
||||
### Deleting row with specific column value
|
||||
|
||||
```javascript
|
||||
```ts
|
||||
const con = await lancedb.connect("./.lancedb")
|
||||
const data = [
|
||||
{id: 1, vector: [1, 2]},
|
||||
@@ -552,7 +615,7 @@ Use the `delete()` method on tables to delete rows from a table. To choose which
|
||||
|
||||
### Delete from a list of values
|
||||
|
||||
```javascript
|
||||
```ts
|
||||
const to_remove = [1, 5];
|
||||
await tbl.delete(`id IN (${to_remove.join(",")})`)
|
||||
await tbl.countRows() // Returns 1
|
||||
@@ -609,26 +672,49 @@ This can be used to update zero to all rows depending on how many rows match the
|
||||
2 2 [10.0, 10.0]
|
||||
```
|
||||
|
||||
=== "JavaScript/Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
API Reference: [vectordb.Table.update](../javascript/interfaces/Table.md/#update)
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```javascript
|
||||
const lancedb = require("vectordb");
|
||||
API Reference: [lancedb.Table.update](../js/classes/Table.md/#update)
|
||||
|
||||
const db = await lancedb.connect("./.lancedb");
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
|
||||
const data = [
|
||||
{x: 1, vector: [1, 2]},
|
||||
{x: 2, vector: [3, 4]},
|
||||
{x: 3, vector: [5, 6]},
|
||||
];
|
||||
const tbl = await db.createTable("my_table", data)
|
||||
const db = await lancedb.connect("./.lancedb");
|
||||
|
||||
await tbl.update({ where: "x = 2", values: {vector: [10, 10]} })
|
||||
```
|
||||
const data = [
|
||||
{x: 1, vector: [1, 2]},
|
||||
{x: 2, vector: [3, 4]},
|
||||
{x: 3, vector: [5, 6]},
|
||||
];
|
||||
const tbl = await db.createTable("my_table", data)
|
||||
|
||||
The `values` parameter is used to provide the new values for the columns as literal values. You can also use the `values_sql` / `valuesSql` parameter to provide SQL expressions for the new values. For example, you can use `values_sql="x + 1"` to increment the value of the `x` column by 1.
|
||||
await tbl.update({vector: [10, 10]}, { where: "x = 2"})
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
API Reference: [vectordb.Table.update](../javascript/interfaces/Table.md/#update)
|
||||
|
||||
```ts
|
||||
const lancedb = require("vectordb");
|
||||
|
||||
const db = await lancedb.connect("./.lancedb");
|
||||
|
||||
const data = [
|
||||
{x: 1, vector: [1, 2]},
|
||||
{x: 2, vector: [3, 4]},
|
||||
{x: 3, vector: [5, 6]},
|
||||
];
|
||||
const tbl = await db.createTable("my_table", data)
|
||||
|
||||
await tbl.update({ where: "x = 2", values: {vector: [10, 10]} })
|
||||
```
|
||||
|
||||
#### Updating using a sql query
|
||||
|
||||
The `values` parameter is used to provide the new values for the columns as literal values. You can also use the `values_sql` / `valuesSql` parameter to provide SQL expressions for the new values. For example, you can use `values_sql="x + 1"` to increment the value of the `x` column by 1.
|
||||
|
||||
=== "Python"
|
||||
|
||||
@@ -647,11 +733,17 @@ The `values` parameter is used to provide the new values for the columns as lite
|
||||
2 3 [10.0, 10.0]
|
||||
```
|
||||
|
||||
=== "JavaScript/Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
```javascript
|
||||
await tbl.update({ valuesSql: { x: "x + 1" } })
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
Coming Soon!
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
await tbl.update({ valuesSql: { x: "x + 1" } })
|
||||
```
|
||||
|
||||
!!! info "Note"
|
||||
|
||||
@@ -672,7 +764,7 @@ Use the `drop_table()` method on the database to remove a table.
|
||||
By default, if the table does not exist an exception is raised. To suppress this,
|
||||
you can pass in `ignore_missing=True`.
|
||||
|
||||
=== "Javascript/Typescript"
|
||||
=== "TypeScript"
|
||||
|
||||
```typescript
|
||||
--8<-- "docs/src/basic_legacy.ts:drop_table"
|
||||
@@ -697,7 +789,7 @@ There are three possible settings for `read_consistency_interval`:
|
||||
This is only tune-able in LanceDB OSS. In LanceDB Cloud, readers are always eventually consistent.
|
||||
|
||||
=== "Python"
|
||||
|
||||
|
||||
To set strong consistency, use `timedelta(0)`:
|
||||
|
||||
```python
|
||||
@@ -719,33 +811,35 @@ There are three possible settings for `read_consistency_interval`:
|
||||
```python
|
||||
db = lancedb.connect("./.lancedb")
|
||||
table = db.open_table("my_table")
|
||||
|
||||
|
||||
# (Other writes happen to my_table from another process)
|
||||
|
||||
# Check for updates
|
||||
table.checkout_latest()
|
||||
```
|
||||
|
||||
=== "JavaScript/Typescript"
|
||||
=== "Typescript[^1]"
|
||||
|
||||
To set strong consistency, use `0`:
|
||||
|
||||
```javascript
|
||||
```ts
|
||||
const db = await lancedb.connect({ uri: "./.lancedb", readConsistencyInterval: 0 });
|
||||
const table = await db.openTable("my_table");
|
||||
```
|
||||
|
||||
For eventual consistency, specify the update interval as seconds:
|
||||
|
||||
```javascript
|
||||
```ts
|
||||
const db = await lancedb.connect({ uri: "./.lancedb", readConsistencyInterval: 5 });
|
||||
const table = await db.openTable("my_table");
|
||||
```
|
||||
|
||||
<!-- Node doesn't yet support the version time travel: https://github.com/lancedb/lancedb/issues/1007
|
||||
<!-- Node doesn't yet support the version time travel: https://github.com/lancedb/lancedb/issues/1007
|
||||
Once it does, we can show manual consistency check for Node as well.
|
||||
-->
|
||||
|
||||
## What's next?
|
||||
|
||||
Learn the best practices on creating an ANN index and getting the most out of it.
|
||||
Learn the best practices on creating an ANN index and getting the most out of it.
|
||||
|
||||
[^1]: The `vectordb` package is a legacy package that is deprecated in favor of `@lancedb/lancedb`. The `vectordb` package will continue to receive bug fixes and security updates until September 2024. We recommend all new projects use `@lancedb/lancedb`. See the [migration guide](migration.md) for more information.
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
## Improving retriever performance
|
||||
|
||||
Try it yourself - <a href="https://colab.research.google.com/github/lancedb/lancedb/blob/main/docs/src/notebooks/lancedb_reranking.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a><br/>
|
||||
|
||||
VectorDBs are used as retreivers in recommender or chatbot-based systems for retrieving relevant data based on user queries. For example, retriever is a critical component of Retrieval Augmented Generation (RAG) acrhitectures. In this section, we will discuss how to improve the performance of retrievers.
|
||||
|
||||
There are serveral ways to improve the performance of retrievers. Some of the common techniques are:
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
Continuing from the previous example, we can now rerank the results using more complex rerankers.
|
||||
Continuing from the previous section, we can now rerank the results using more complex rerankers.
|
||||
|
||||
Try it yourself - <a href="https://colab.research.google.com/github/lancedb/lancedb/blob/main/docs/src/notebooks/lancedb_reranking.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a><br/>
|
||||
|
||||
## Reranking search results
|
||||
You can rerank any search results using a reranker. The syntax for reranking is as follows:
|
||||
|
||||
82
docs/src/guides/tuning_retrievers/3_embed_tuning.md
Normal file
82
docs/src/guides/tuning_retrievers/3_embed_tuning.md
Normal file
@@ -0,0 +1,82 @@
|
||||
## Finetuning the Embedding Model
|
||||
Try it yourself - <a href="https://colab.research.google.com/github/lancedb/lancedb/blob/main/docs/src/notebooks/embedding_tuner.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a><br/>
|
||||
|
||||
Another way to improve retriever performance is to fine-tune the embedding model itself. Fine-tuning the embedding model can help in learning better representations for the documents and queries in the dataset. This can be particularly useful when the dataset is very different from the pre-trained data used to train the embedding model.
|
||||
|
||||
We'll use the same dataset as in the previous sections. Start off by splitting the dataset into training and validation sets:
|
||||
```python
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
train_df, validation_df = train_test_split("data_qa.csv", test_size=0.2, random_state=42)
|
||||
|
||||
train_df.to_csv("data_train.csv", index=False)
|
||||
validation_df.to_csv("data_val.csv", index=False)
|
||||
```
|
||||
|
||||
You can use any tuning API to fine-tune embedding models. In this example, we'll utilise Llama-index as it also comes with utilities for synthetic data generation and training the model.
|
||||
|
||||
|
||||
Then parse the dataset as llama-index text nodes and generate synthetic QA pairs from each node.
|
||||
```python
|
||||
from llama_index.core.node_parser import SentenceSplitter
|
||||
from llama_index.readers.file import PagedCSVReader
|
||||
from llama_index.finetuning import generate_qa_embedding_pairs
|
||||
from llama_index.core.evaluation import EmbeddingQAFinetuneDataset
|
||||
|
||||
def load_corpus(file):
|
||||
loader = PagedCSVReader(encoding="utf-8")
|
||||
docs = loader.load_data(file=Path(file))
|
||||
|
||||
parser = SentenceSplitter()
|
||||
nodes = parser.get_nodes_from_documents(docs)
|
||||
|
||||
return nodes
|
||||
|
||||
from llama_index.llms.openai import OpenAI
|
||||
|
||||
|
||||
train_dataset = generate_qa_embedding_pairs(
|
||||
llm=OpenAI(model="gpt-3.5-turbo"), nodes=train_nodes, verbose=False
|
||||
)
|
||||
val_dataset = generate_qa_embedding_pairs(
|
||||
llm=OpenAI(model="gpt-3.5-turbo"), nodes=val_nodes, verbose=False
|
||||
)
|
||||
```
|
||||
|
||||
Now we'll use `SentenceTransformersFinetuneEngine` engine to fine-tune the model. You can also use `sentence-transformers` or `transformers` library to fine-tune the model.
|
||||
|
||||
```python
|
||||
from llama_index.finetuning import SentenceTransformersFinetuneEngine
|
||||
|
||||
finetune_engine = SentenceTransformersFinetuneEngine(
|
||||
train_dataset,
|
||||
model_id="BAAI/bge-small-en-v1.5",
|
||||
model_output_path="tuned_model",
|
||||
val_dataset=val_dataset,
|
||||
)
|
||||
finetune_engine.finetune()
|
||||
embed_model = finetune_engine.get_finetuned_model()
|
||||
```
|
||||
This saves the fine tuned embedding model in `tuned_model` folder. This al
|
||||
|
||||
# Evaluation results
|
||||
In order to eval the retriever, you can either use this model to ingest the data into LanceDB directly or llama-index's LanceDB integration to create a `VectorStoreIndex` and use it as a retriever.
|
||||
On performing the same hit-rate evaluation as before, we see a significant improvement in the hit-rate across all query types.
|
||||
|
||||
### Baseline
|
||||
| Query Type | Hit-rate@5 |
|
||||
| --- | --- |
|
||||
| Vector Search | 0.640 |
|
||||
| Full-text Search | 0.595 |
|
||||
| Reranked Vector Search | 0.677 |
|
||||
| Reranked Full-text Search | 0.672 |
|
||||
| Hybrid Search (w/ CohereReranker) | 0.759|
|
||||
|
||||
### Fine-tuned model ( 2 iterations )
|
||||
| Query Type | Hit-rate@5 |
|
||||
| --- | --- |
|
||||
| Vector Search | 0.672 |
|
||||
| Full-text Search | 0.595 |
|
||||
| Reranked Vector Search | 0.754 |
|
||||
| Reranked Full-text Search | 0.672|
|
||||
| Hybrid Search (w/ CohereReranker) | 0.768 |
|
||||
@@ -9,7 +9,8 @@ around the asynchronous client.
|
||||
This guide describes the differences between the two APIs and will hopefully assist users
|
||||
that would like to migrate to the new API.
|
||||
|
||||
## Closeable Connections
|
||||
## Python
|
||||
### Closeable Connections
|
||||
|
||||
The Connection now has a `close` method. You can call this when
|
||||
you are done with the connection to eagerly free resources. Currently
|
||||
@@ -32,20 +33,20 @@ async def my_async_fn():
|
||||
It is not mandatory to call the `close` method. If you do not call it
|
||||
then the connection will be closed when the object is garbage collected.
|
||||
|
||||
## Closeable Table
|
||||
### Closeable Table
|
||||
|
||||
The Table now also has a `close` method, similar to the connection. This
|
||||
can be used to eagerly free the cache used by a Table object. Similar to
|
||||
the connection, it can be used as a context manager and it is not mandatory
|
||||
to call the `close` method.
|
||||
|
||||
### Changes to Table APIs
|
||||
#### Changes to Table APIs
|
||||
|
||||
- Previously `Table.schema` was a property. Now it is an async method.
|
||||
- The method `Table.__len__` was removed and `len(table)` will no longer
|
||||
work. Use `Table.count_rows` instead.
|
||||
|
||||
### Creating Indices
|
||||
#### Creating Indices
|
||||
|
||||
The `Table.create_index` method is now used for creating both vector indices
|
||||
and scalar indices. It currently requires a column name to be specified (the
|
||||
@@ -55,12 +56,12 @@ the size of the data.
|
||||
To specify index configuration details you will need to specify which kind of
|
||||
index you are using.
|
||||
|
||||
### Querying
|
||||
#### Querying
|
||||
|
||||
The `Table.search` method has been renamed to `AsyncTable.vector_search` for
|
||||
clarity.
|
||||
|
||||
## Features not yet supported
|
||||
### Features not yet supported
|
||||
|
||||
The following features are not yet supported by the asynchronous API. However,
|
||||
we plan to support them soon.
|
||||
@@ -74,3 +75,22 @@ we plan to support them soon.
|
||||
search
|
||||
- Remote connections to LanceDb Cloud are not yet supported.
|
||||
- The method Table.head is not yet supported.
|
||||
|
||||
## TypeScript/JavaScript
|
||||
|
||||
For JS/TS users, we offer a brand new SDK [@lancedb/lancedb](https://www.npmjs.com/package/@lancedb/lancedb)
|
||||
|
||||
### Changes to Table APIs
|
||||
|
||||
Previously `Table.schema` was a property. Now it is an async method.
|
||||
|
||||
|
||||
#### Creating Indices
|
||||
|
||||
The `Table.createIndex` method is now used for creating both vector indices
|
||||
and scalar indices. It currently requires a column name to be specified (the
|
||||
column to index). Vector index defaults are now smarter and scale better with
|
||||
the size of the data.
|
||||
|
||||
To specify index configuration details you will need to specify which kind of
|
||||
index you are using.
|
||||
|
||||
1437
docs/src/notebooks/embedding_tuner.ipynb
Normal file
1437
docs/src/notebooks/embedding_tuner.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
@@ -6,7 +6,7 @@
|
||||
"id": "b3Y3DOVqtIbc"
|
||||
},
|
||||
"source": [
|
||||
"# Example walkthrough\n",
|
||||
"# Example - Improve Retrievers using Rerankers & Hybrid search\n",
|
||||
"\n",
|
||||
"## Optimizing RAG retrieval performance using hybrid search & reranking"
|
||||
]
|
||||
|
||||
@@ -15,7 +15,6 @@ LanceDB comes with some built-in rerankers. Some of the rerankers that are avail
|
||||
Using rerankers is optional for vector and FTS. However, for hybrid search, rerankers are required. To use a reranker, you need to create an instance of the reranker and pass it to the `rerank` method of the query builder.
|
||||
|
||||
```python
|
||||
import numpy
|
||||
import lancedb
|
||||
from lancedb.embeddings import get_registry
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
@@ -54,6 +53,7 @@ LanceDB comes with some built-in rerankers. Here are some of the rerankers that
|
||||
- [ColBERT Reranker](./colbert.md)
|
||||
- [OpenAI Reranker](./openai.md)
|
||||
- [Linear Combination Reranker](./linear_combination.md)
|
||||
- [Jina Reranker](./jina.md)
|
||||
|
||||
## Creating Custom Rerankers
|
||||
|
||||
|
||||
@@ -53,13 +53,24 @@ db.create_table("my_vectors", data=data)
|
||||
.to_list()
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
--8<-- "docs/src/search_legacy.ts:import"
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
--8<-- "docs/src/search_legacy.ts:search1"
|
||||
```
|
||||
```ts
|
||||
--8<-- "nodejs/examples/search.ts:import"
|
||||
|
||||
--8<-- "nodejs/examples/search.ts:search1"
|
||||
```
|
||||
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
--8<-- "docs/src/search_legacy.ts:import"
|
||||
|
||||
--8<-- "docs/src/search_legacy.ts:search1"
|
||||
```
|
||||
|
||||
By default, `l2` will be used as metric type. You can specify the metric type as
|
||||
`cosine` or `dot` if required.
|
||||
@@ -73,11 +84,19 @@ By default, `l2` will be used as metric type. You can specify the metric type as
|
||||
.to_list()
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
--8<-- "docs/src/search_legacy.ts:search2"
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/search.ts:search2"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```javascript
|
||||
--8<-- "docs/src/search_legacy.ts:search2"
|
||||
```
|
||||
|
||||
## Approximate nearest neighbor (ANN) search
|
||||
|
||||
|
||||
@@ -44,11 +44,19 @@ const tbl = await db.createTable('myVectors', data)
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
--8<-- "docs/src/sql_legacy.ts:search"
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/filtering.ts:search"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
--8<-- "docs/src/sql_legacy.ts:search"
|
||||
```
|
||||
|
||||
## SQL filters
|
||||
|
||||
@@ -78,11 +86,19 @@ For example, the following filter string is acceptable:
|
||||
.to_arrow()
|
||||
```
|
||||
|
||||
=== "Javascript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
--8<-- "docs/src/sql_legacy.ts:vec_search"
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/filtering.ts:vec_search"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
--8<-- "docs/src/sql_legacy.ts:vec_search"
|
||||
```
|
||||
|
||||
If your column name contains special characters or is a [SQL Keyword](https://docs.rs/sqlparser/latest/sqlparser/keywords/index.html),
|
||||
you can use backtick (`` ` ``) to escape it. For nested fields, each segment of the
|
||||
@@ -148,10 +164,18 @@ You can also filter your data without search.
|
||||
tbl.search().where("id = 10").limit(10).to_arrow()
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
=== "TypeScript"
|
||||
|
||||
```javascript
|
||||
--8<---- "docs/src/sql_legacy.ts:sql_search"
|
||||
```
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```ts
|
||||
--8<-- "nodejs/examples/filtering.ts:sql_search"
|
||||
```
|
||||
|
||||
=== "vectordb (deprecated)"
|
||||
|
||||
```ts
|
||||
--8<---- "docs/src/sql_legacy.ts:sql_search"
|
||||
```
|
||||
|
||||
!!!warning "If your table is large, this could potentially return a very large amount of data. Please be sure to use a `limit` clause unless you're sure you want to return the whole result set."
|
||||
|
||||
4
node/package-lock.json
generated
4
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.6.0",
|
||||
"version": "0.7.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "vectordb",
|
||||
"version": "0.6.0",
|
||||
"version": "0.7.0",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.6.0",
|
||||
"version": "0.7.0",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
|
||||
@@ -62,6 +62,8 @@ export {
|
||||
|
||||
const defaultAwsRegion = "us-west-2";
|
||||
|
||||
const defaultRequestTimeout = 10_000
|
||||
|
||||
export interface AwsCredentials {
|
||||
accessKeyId: string
|
||||
|
||||
@@ -119,6 +121,11 @@ export interface ConnectionOptions {
|
||||
*/
|
||||
hostOverride?: string
|
||||
|
||||
/**
|
||||
* Duration in milliseconds for request timeout. Default = 10,000 (10 seconds)
|
||||
*/
|
||||
timeout?: number
|
||||
|
||||
/**
|
||||
* (For LanceDB OSS only): The interval, in seconds, at which to check for
|
||||
* updates to the table from other processes. If None, then consistency is not
|
||||
@@ -204,7 +211,8 @@ export async function connect(
|
||||
awsCredentials: undefined,
|
||||
awsRegion: defaultAwsRegion,
|
||||
apiKey: undefined,
|
||||
region: defaultAwsRegion
|
||||
region: defaultAwsRegion,
|
||||
timeout: defaultRequestTimeout
|
||||
},
|
||||
arg
|
||||
);
|
||||
|
||||
@@ -41,7 +41,7 @@ async function callWithMiddlewares (
|
||||
if (i > middlewares.length) {
|
||||
const headers = Object.fromEntries(req.headers.entries())
|
||||
const params = Object.fromEntries(req.params?.entries() ?? [])
|
||||
const timeout = 10000
|
||||
const timeout = opts?.timeout
|
||||
let res
|
||||
if (req.method === Method.POST) {
|
||||
res = await axios.post(
|
||||
@@ -82,6 +82,7 @@ async function callWithMiddlewares (
|
||||
|
||||
interface MiddlewareInvocationOptions {
|
||||
responseType?: ResponseType
|
||||
timeout?: number,
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -123,15 +124,19 @@ export class HttpLancedbClient {
|
||||
private readonly _url: string
|
||||
private readonly _apiKey: () => string
|
||||
private readonly _middlewares: HttpLancedbClientMiddleware[]
|
||||
private readonly _timeout: number | undefined
|
||||
|
||||
public constructor (
|
||||
url: string,
|
||||
apiKey: string,
|
||||
private readonly _dbName?: string
|
||||
timeout?: number,
|
||||
private readonly _dbName?: string,
|
||||
|
||||
) {
|
||||
this._url = url
|
||||
this._apiKey = () => apiKey
|
||||
this._middlewares = []
|
||||
this._timeout = timeout
|
||||
}
|
||||
|
||||
get uri (): string {
|
||||
@@ -230,7 +235,10 @@ export class HttpLancedbClient {
|
||||
|
||||
let response
|
||||
try {
|
||||
response = await callWithMiddlewares(req, this._middlewares, { responseType })
|
||||
response = await callWithMiddlewares(req, this._middlewares, {
|
||||
responseType,
|
||||
timeout: this._timeout,
|
||||
})
|
||||
|
||||
// return response
|
||||
} catch (err: any) {
|
||||
@@ -267,7 +275,7 @@ export class HttpLancedbClient {
|
||||
* Make a clone of this client
|
||||
*/
|
||||
private clone (): HttpLancedbClient {
|
||||
const clone = new HttpLancedbClient(this._url, this._apiKey(), this._dbName)
|
||||
const clone = new HttpLancedbClient(this._url, this._apiKey(), this._timeout, this._dbName)
|
||||
for (const mw of this._middlewares) {
|
||||
clone._middlewares.push(mw)
|
||||
}
|
||||
|
||||
@@ -72,6 +72,7 @@ export class RemoteConnection implements Connection {
|
||||
this._client = new HttpLancedbClient(
|
||||
server,
|
||||
opts.apiKey,
|
||||
opts.timeout,
|
||||
opts.hostOverride === undefined ? undefined : this._dbName
|
||||
)
|
||||
}
|
||||
|
||||
@@ -96,6 +96,50 @@ describe.each([arrow, arrowOld])("Given a table", (arrow: any) => {
|
||||
expect(await table.countRows("id == 10")).toBe(1);
|
||||
});
|
||||
|
||||
it("should let me update values with `values`", async () => {
|
||||
await table.add([{ id: 1 }]);
|
||||
expect(await table.countRows("id == 1")).toBe(1);
|
||||
expect(await table.countRows("id == 7")).toBe(0);
|
||||
await table.update({ values: { id: 7 } });
|
||||
expect(await table.countRows("id == 1")).toBe(0);
|
||||
expect(await table.countRows("id == 7")).toBe(1);
|
||||
await table.add([{ id: 2 }]);
|
||||
// Test Map as input
|
||||
await table.update({
|
||||
values: {
|
||||
id: "10",
|
||||
},
|
||||
where: "id % 2 == 0",
|
||||
});
|
||||
expect(await table.countRows("id == 2")).toBe(0);
|
||||
expect(await table.countRows("id == 7")).toBe(1);
|
||||
expect(await table.countRows("id == 10")).toBe(1);
|
||||
});
|
||||
|
||||
it("should let me update values with `valuesSql`", async () => {
|
||||
await table.add([{ id: 1 }]);
|
||||
expect(await table.countRows("id == 1")).toBe(1);
|
||||
expect(await table.countRows("id == 7")).toBe(0);
|
||||
await table.update({
|
||||
valuesSql: {
|
||||
id: "7",
|
||||
},
|
||||
});
|
||||
expect(await table.countRows("id == 1")).toBe(0);
|
||||
expect(await table.countRows("id == 7")).toBe(1);
|
||||
await table.add([{ id: 2 }]);
|
||||
// Test Map as input
|
||||
await table.update({
|
||||
valuesSql: {
|
||||
id: "10",
|
||||
},
|
||||
where: "id % 2 == 0",
|
||||
});
|
||||
expect(await table.countRows("id == 2")).toBe(0);
|
||||
expect(await table.countRows("id == 7")).toBe(1);
|
||||
expect(await table.countRows("id == 10")).toBe(1);
|
||||
});
|
||||
|
||||
// https://github.com/lancedb/lancedb/issues/1293
|
||||
test.each([new arrow.Float16(), new arrow.Float32(), new arrow.Float64()])(
|
||||
"can create empty table with non default float type: %s",
|
||||
|
||||
@@ -6,5 +6,5 @@
|
||||
"target": "es2022",
|
||||
"types": ["jest", "node"]
|
||||
},
|
||||
"include": ["**/*"]
|
||||
"include": ["**/*", "../examples/ann_indexes.ts"]
|
||||
}
|
||||
|
||||
28
nodejs/__test__/util.test.ts
Normal file
28
nodejs/__test__/util.test.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import { IntoSql, toSQL } from "../lancedb/util";
|
||||
test.each([
|
||||
["string", "'string'"],
|
||||
[123, "123"],
|
||||
[1.11, "1.11"],
|
||||
[true, "TRUE"],
|
||||
[false, "FALSE"],
|
||||
[null, "NULL"],
|
||||
[new Date("2021-01-01T00:00:00.000Z"), "'2021-01-01T00:00:00.000Z'"],
|
||||
[[1, 2, 3], "[1, 2, 3]"],
|
||||
[new ArrayBuffer(8), "X'0000000000000000'"],
|
||||
[Buffer.from("hello"), "X'68656c6c6f'"],
|
||||
["Hello 'world'", "'Hello ''world'''"],
|
||||
])("toSQL(%p) === %p", (value, expected) => {
|
||||
expect(toSQL(value)).toBe(expected);
|
||||
});
|
||||
|
||||
test("toSQL({}) throws on unsupported value type", () => {
|
||||
expect(() => toSQL({} as unknown as IntoSql)).toThrow(
|
||||
"Unsupported value type: object value: ([object Object])",
|
||||
);
|
||||
});
|
||||
test("toSQL() throws on unsupported value type", () => {
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||
expect(() => (<any>toSQL)()).toThrow(
|
||||
"Unsupported value type: undefined value: (undefined)",
|
||||
);
|
||||
});
|
||||
@@ -94,7 +94,13 @@
|
||||
"useValidTypeof": "error"
|
||||
}
|
||||
},
|
||||
"ignore": ["**/dist/**/*", "**/native.js", "**/native.d.ts"]
|
||||
"ignore": [
|
||||
"**/dist/**/*",
|
||||
"**/native.js",
|
||||
"**/native.d.ts",
|
||||
"__test__/docs/**/*",
|
||||
"examples/**/*"
|
||||
]
|
||||
},
|
||||
"javascript": {
|
||||
"globals": []
|
||||
|
||||
1
nodejs/examples/.gitignore
vendored
Normal file
1
nodejs/examples/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
data/
|
||||
49
nodejs/examples/ann_indexes.ts
Normal file
49
nodejs/examples/ann_indexes.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
// --8<-- [start:import]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
// --8<-- [end:import]
|
||||
|
||||
// --8<-- [start:ingest]
|
||||
const db = await lancedb.connect("/tmp/lancedb/");
|
||||
|
||||
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
||||
vector: Array(1536).fill(i),
|
||||
id: `${i}`,
|
||||
content: "",
|
||||
longId: `${i}`,
|
||||
}));
|
||||
|
||||
const table = await db.createTable("my_vectors", data, { mode: "overwrite" });
|
||||
await table.createIndex("vector", {
|
||||
config: lancedb.Index.ivfPq({
|
||||
numPartitions: 16,
|
||||
numSubVectors: 48,
|
||||
}),
|
||||
});
|
||||
// --8<-- [end:ingest]
|
||||
|
||||
// --8<-- [start:search1]
|
||||
const _results1 = await table
|
||||
.search(Array(1536).fill(1.2))
|
||||
.limit(2)
|
||||
.nprobes(20)
|
||||
.refineFactor(10)
|
||||
.toArray();
|
||||
// --8<-- [end:search1]
|
||||
|
||||
// --8<-- [start:search2]
|
||||
const _results2 = await table
|
||||
.search(Array(1536).fill(1.2))
|
||||
.where("id != '1141'")
|
||||
.limit(2)
|
||||
.toArray();
|
||||
// --8<-- [end:search2]
|
||||
|
||||
// --8<-- [start:search3]
|
||||
const _results3 = await table
|
||||
.search(Array(1536).fill(1.2))
|
||||
.select(["id"])
|
||||
.limit(2)
|
||||
.toArray();
|
||||
// --8<-- [end:search3]
|
||||
|
||||
console.log("Ann indexes: done");
|
||||
149
nodejs/examples/basic.ts
Normal file
149
nodejs/examples/basic.ts
Normal file
@@ -0,0 +1,149 @@
|
||||
// --8<-- [start:imports]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import * as arrow from "apache-arrow";
|
||||
import { Field, FixedSizeList, Float16, Int32, Schema } from "apache-arrow";
|
||||
|
||||
// --8<-- [end:imports]
|
||||
|
||||
// --8<-- [start:connect]
|
||||
const uri = "/tmp/lancedb/";
|
||||
const db = await lancedb.connect(uri);
|
||||
// --8<-- [end:connect]
|
||||
{
|
||||
// --8<-- [start:create_table]
|
||||
const data = [
|
||||
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
|
||||
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
|
||||
];
|
||||
const _tbl = await db.createTable("myTable", data);
|
||||
// --8<-- [end:create_table]
|
||||
{
|
||||
// --8<-- [start:create_table_exists_ok]
|
||||
const _tbl = await db.createTable("myTable", data, {
|
||||
existsOk: true,
|
||||
});
|
||||
// --8<-- [end:create_table_exists_ok]
|
||||
}
|
||||
{
|
||||
// --8<-- [start:create_table_overwrite]
|
||||
const _tbl = await db.createTable("myTable", data, {
|
||||
mode: "overwrite",
|
||||
});
|
||||
// --8<-- [end:create_table_overwrite]
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:create_table_with_schema]
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field(
|
||||
"vector",
|
||||
new arrow.FixedSizeList(
|
||||
2,
|
||||
new arrow.Field("item", new arrow.Float32(), true),
|
||||
),
|
||||
),
|
||||
new arrow.Field("item", new arrow.Utf8(), true),
|
||||
new arrow.Field("price", new arrow.Float32(), true),
|
||||
]);
|
||||
const data = [
|
||||
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
|
||||
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
|
||||
];
|
||||
const _tbl = await db.createTable("myTable", data, {
|
||||
schema,
|
||||
});
|
||||
// --8<-- [end:create_table_with_schema]
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:create_empty_table]
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field(
|
||||
"vector",
|
||||
new arrow.FixedSizeList(
|
||||
2,
|
||||
new arrow.Field("item", new arrow.Float32(), true),
|
||||
),
|
||||
),
|
||||
]);
|
||||
const _tbl = await db.createEmptyTable("empty_table", schema);
|
||||
// --8<-- [end:create_empty_table]
|
||||
}
|
||||
{
|
||||
// --8<-- [start:open_table]
|
||||
const _tbl = await db.openTable("myTable");
|
||||
// --8<-- [end:open_table]
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:table_names]
|
||||
const tableNames = await db.tableNames();
|
||||
console.log(tableNames);
|
||||
// --8<-- [end:table_names]
|
||||
}
|
||||
|
||||
const tbl = await db.openTable("myTable");
|
||||
{
|
||||
// --8<-- [start:add_data]
|
||||
const data = [
|
||||
{ vector: [1.3, 1.4], item: "fizz", price: 100.0 },
|
||||
{ vector: [9.5, 56.2], item: "buzz", price: 200.0 },
|
||||
];
|
||||
await tbl.add(data);
|
||||
// --8<-- [end:add_data]
|
||||
}
|
||||
{
|
||||
// --8<-- [start:vector_search]
|
||||
const _res = tbl.search([100, 100]).limit(2).toArray();
|
||||
// --8<-- [end:vector_search]
|
||||
}
|
||||
{
|
||||
const data = Array.from({ length: 1000 })
|
||||
.fill(null)
|
||||
.map(() => ({
|
||||
vector: [Math.random(), Math.random()],
|
||||
item: "autogen",
|
||||
price: Math.round(Math.random() * 100),
|
||||
}));
|
||||
|
||||
await tbl.add(data);
|
||||
}
|
||||
|
||||
// --8<-- [start:create_index]
|
||||
await tbl.createIndex("vector");
|
||||
// --8<-- [end:create_index]
|
||||
|
||||
// --8<-- [start:delete_rows]
|
||||
await tbl.delete('item = "fizz"');
|
||||
// --8<-- [end:delete_rows]
|
||||
|
||||
// --8<-- [start:drop_table]
|
||||
await db.dropTable("myTable");
|
||||
// --8<-- [end:drop_table]
|
||||
await db.dropTable("empty_table");
|
||||
|
||||
{
|
||||
// --8<-- [start:create_f16_table]
|
||||
const db = await lancedb.connect("/tmp/lancedb");
|
||||
const dim = 16;
|
||||
const total = 10;
|
||||
const f16Schema = new Schema([
|
||||
new Field("id", new Int32()),
|
||||
new Field(
|
||||
"vector",
|
||||
new FixedSizeList(dim, new Field("item", new Float16(), true)),
|
||||
false,
|
||||
),
|
||||
]);
|
||||
const data = lancedb.makeArrowTable(
|
||||
Array.from(Array(total), (_, i) => ({
|
||||
id: i,
|
||||
vector: Array.from(Array(dim), Math.random),
|
||||
})),
|
||||
{ schema: f16Schema },
|
||||
);
|
||||
const _table = await db.createTable("f16_tbl", data);
|
||||
// --8<-- [end:create_f16_table]
|
||||
await db.dropTable("f16_tbl");
|
||||
}
|
||||
83
nodejs/examples/embedding.ts
Normal file
83
nodejs/examples/embedding.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
// --8<-- [start:imports]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import { LanceSchema, getRegistry, register } from "@lancedb/lancedb/embedding";
|
||||
import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
|
||||
import { type Float, Float32, Utf8 } from "apache-arrow";
|
||||
// --8<-- [end:imports]
|
||||
|
||||
{
|
||||
// --8<-- [start:openai_embeddings]
|
||||
|
||||
const db = await lancedb.connect("/tmp/db");
|
||||
const func = getRegistry()
|
||||
.get("openai")
|
||||
?.create({ model: "text-embedding-ada-002" }) as EmbeddingFunction;
|
||||
|
||||
const wordsSchema = LanceSchema({
|
||||
text: func.sourceField(new Utf8()),
|
||||
vector: func.vectorField(),
|
||||
});
|
||||
const tbl = await db.createEmptyTable("words", wordsSchema, {
|
||||
mode: "overwrite",
|
||||
});
|
||||
await tbl.add([{ text: "hello world" }, { text: "goodbye world" }]);
|
||||
|
||||
const query = "greetings";
|
||||
const actual = (await (await tbl.search(query)).limit(1).toArray())[0];
|
||||
|
||||
// --8<-- [end:openai_embeddings]
|
||||
console.log("result = ", actual.text);
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:embedding_function]
|
||||
const db = await lancedb.connect("/tmp/db");
|
||||
|
||||
@register("my_embedding")
|
||||
class MyEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {};
|
||||
}
|
||||
ndims() {
|
||||
return 3;
|
||||
}
|
||||
embeddingDataType(): Float {
|
||||
return new Float32();
|
||||
}
|
||||
async computeQueryEmbeddings(_data: string) {
|
||||
// This is a placeholder for a real embedding function
|
||||
return [1, 2, 3];
|
||||
}
|
||||
async computeSourceEmbeddings(data: string[]) {
|
||||
// This is a placeholder for a real embedding function
|
||||
return Array.from({ length: data.length }).fill([1, 2, 3]) as number[][];
|
||||
}
|
||||
}
|
||||
|
||||
const func = new MyEmbeddingFunction();
|
||||
|
||||
const data = [{ text: "pepperoni" }, { text: "pineapple" }];
|
||||
|
||||
// Option 1: manually specify the embedding function
|
||||
const table = await db.createTable("vectors", data, {
|
||||
embeddingFunction: {
|
||||
function: func,
|
||||
sourceColumn: "text",
|
||||
vectorColumn: "vector",
|
||||
},
|
||||
mode: "overwrite",
|
||||
});
|
||||
|
||||
// Option 2: provide the embedding function through a schema
|
||||
|
||||
const schema = LanceSchema({
|
||||
text: func.sourceField(new Utf8()),
|
||||
vector: func.vectorField(),
|
||||
});
|
||||
|
||||
const table2 = await db.createTable("vectors2", data, {
|
||||
schema,
|
||||
mode: "overwrite",
|
||||
});
|
||||
// --8<-- [end:embedding_function]
|
||||
}
|
||||
34
nodejs/examples/filtering.ts
Normal file
34
nodejs/examples/filtering.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
|
||||
const db = await lancedb.connect("data/sample-lancedb");
|
||||
|
||||
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
||||
vector: Array(1536).fill(i),
|
||||
id: i,
|
||||
item: `item ${i}`,
|
||||
strId: `${i}`,
|
||||
}));
|
||||
|
||||
const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
|
||||
|
||||
// --8<-- [start:search]
|
||||
const _result = await tbl
|
||||
.search(Array(1536).fill(0.5))
|
||||
.limit(1)
|
||||
.where("id = 10")
|
||||
.toArray();
|
||||
// --8<-- [end:search]
|
||||
|
||||
// --8<-- [start:vec_search]
|
||||
await tbl
|
||||
.search(Array(1536).fill(0))
|
||||
.where("(item IN ('item 0', 'item 2')) AND (id > 10)")
|
||||
.postfilter()
|
||||
.toArray();
|
||||
// --8<-- [end:vec_search]
|
||||
|
||||
// --8<-- [start:sql_search]
|
||||
await tbl.query().where("id = 10").limit(10).toArray();
|
||||
// --8<-- [end:sql_search]
|
||||
|
||||
console.log("SQL search: done");
|
||||
27
nodejs/examples/jsconfig.json
Normal file
27
nodejs/examples/jsconfig.json
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
// Enable latest features
|
||||
"lib": ["ESNext", "DOM"],
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleDetection": "force",
|
||||
"jsx": "react-jsx",
|
||||
"allowJs": true,
|
||||
|
||||
// Bundler mode
|
||||
"moduleResolution": "bundler",
|
||||
"allowImportingTsExtensions": true,
|
||||
"verbatimModuleSyntax": true,
|
||||
"noEmit": true,
|
||||
|
||||
// Best practices
|
||||
"strict": true,
|
||||
"skipLibCheck": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
|
||||
// Some stricter flags (disabled by default)
|
||||
"noUnusedLocals": false,
|
||||
"noUnusedParameters": false,
|
||||
"noPropertyAccessFromIndexSignature": false
|
||||
}
|
||||
}
|
||||
79
nodejs/examples/package-lock.json
generated
Normal file
79
nodejs/examples/package-lock.json
generated
Normal file
@@ -0,0 +1,79 @@
|
||||
{
|
||||
"name": "examples",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "examples",
|
||||
"version": "1.0.0",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@lancedb/lancedb": "file:../"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5.0.0"
|
||||
}
|
||||
},
|
||||
"..": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.6.0",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache 2.0",
|
||||
"os": [
|
||||
"darwin",
|
||||
"linux",
|
||||
"win32"
|
||||
],
|
||||
"dependencies": {
|
||||
"apache-arrow": "^15.0.0",
|
||||
"axios": "^1.7.2",
|
||||
"openai": "^4.29.2",
|
||||
"reflect-metadata": "^0.2.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@aws-sdk/client-kms": "^3.33.0",
|
||||
"@aws-sdk/client-s3": "^3.33.0",
|
||||
"@biomejs/biome": "^1.7.3",
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@napi-rs/cli": "^2.18.0",
|
||||
"@types/axios": "^0.14.0",
|
||||
"@types/jest": "^29.1.2",
|
||||
"@types/tmp": "^0.2.6",
|
||||
"apache-arrow-old": "npm:apache-arrow@13.0.0",
|
||||
"eslint": "^8.57.0",
|
||||
"jest": "^29.7.0",
|
||||
"shx": "^0.3.4",
|
||||
"tmp": "^0.2.3",
|
||||
"ts-jest": "^29.1.2",
|
||||
"typedoc": "^0.25.7",
|
||||
"typedoc-plugin-markdown": "^3.17.1",
|
||||
"typescript": "^5.3.3",
|
||||
"typescript-eslint": "^7.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/lancedb": {
|
||||
"resolved": "..",
|
||||
"link": true
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.5.2",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.2.tgz",
|
||||
"integrity": "sha512-NcRtPEOsPFFWjobJEtfihkLCZCXZt/os3zf8nTxjVH3RvTSxjrCamJpbExGvYOF+tFHc3pA65qpdwPbzjohhew==",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.17"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
18
nodejs/examples/package.json
Normal file
18
nodejs/examples/package.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "examples",
|
||||
"version": "1.0.0",
|
||||
"description": "Examples for LanceDB",
|
||||
"main": "index.js",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "Lance Devs",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@lancedb/lancedb": "file:../"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5.0.0"
|
||||
}
|
||||
}
|
||||
37
nodejs/examples/search.ts
Normal file
37
nodejs/examples/search.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
// --8<-- [end:import]
|
||||
import * as fs from "node:fs";
|
||||
// --8<-- [start:import]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
|
||||
async function setup() {
|
||||
fs.rmSync("data/sample-lancedb", { recursive: true, force: true });
|
||||
const db = await lancedb.connect("data/sample-lancedb");
|
||||
|
||||
const data = Array.from({ length: 10_000 }, (_, i) => ({
|
||||
vector: Array(1536).fill(i),
|
||||
id: `${i}`,
|
||||
content: "",
|
||||
longId: `${i}`,
|
||||
}));
|
||||
|
||||
await db.createTable("my_vectors", data);
|
||||
}
|
||||
|
||||
await setup();
|
||||
|
||||
// --8<-- [start:search1]
|
||||
const db = await lancedb.connect("data/sample-lancedb");
|
||||
const tbl = await db.openTable("my_vectors");
|
||||
|
||||
const _results1 = await tbl.search(Array(1536).fill(1.2)).limit(10).toArray();
|
||||
// --8<-- [end:search1]
|
||||
|
||||
// --8<-- [start:search2]
|
||||
const _results2 = await tbl
|
||||
.search(Array(1536).fill(1.2))
|
||||
.distanceType("cosine")
|
||||
.limit(10)
|
||||
.toArray();
|
||||
// --8<-- [end:search2]
|
||||
|
||||
console.log("search: done");
|
||||
@@ -265,7 +265,11 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
||||
* @returns A Promise that resolves to a string containing the query execution plan explanation.
|
||||
*/
|
||||
async explainPlan(verbose = false): Promise<string> {
|
||||
return await this.inner.explainPlan(verbose);
|
||||
if (this.inner instanceof Promise) {
|
||||
return this.inner.then((inner) => inner.explainPlan(verbose));
|
||||
} else {
|
||||
return this.inner.explainPlan(verbose);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ import { IndexOptions } from "../indices";
|
||||
import { MergeInsertBuilder } from "../merge";
|
||||
import { VectorQuery } from "../query";
|
||||
import { AddDataOptions, Table, UpdateOptions } from "../table";
|
||||
import { IntoSql, toSQL } from "../util";
|
||||
import { RestfulLanceDBClient } from "./client";
|
||||
|
||||
export class RemoteTable extends Table {
|
||||
@@ -84,12 +85,66 @@ export class RemoteTable extends Table {
|
||||
}
|
||||
|
||||
async update(
|
||||
updates: Map<string, string> | Record<string, string>,
|
||||
optsOrUpdates:
|
||||
| (Map<string, string> | Record<string, string>)
|
||||
| ({
|
||||
values: Map<string, IntoSql> | Record<string, IntoSql>;
|
||||
} & Partial<UpdateOptions>)
|
||||
| ({
|
||||
valuesSql: Map<string, string> | Record<string, string>;
|
||||
} & Partial<UpdateOptions>),
|
||||
options?: Partial<UpdateOptions>,
|
||||
): Promise<void> {
|
||||
const isValues =
|
||||
"values" in optsOrUpdates && typeof optsOrUpdates.values !== "string";
|
||||
const isValuesSql =
|
||||
"valuesSql" in optsOrUpdates &&
|
||||
typeof optsOrUpdates.valuesSql !== "string";
|
||||
const isMap = (obj: unknown): obj is Map<string, string> => {
|
||||
return obj instanceof Map;
|
||||
};
|
||||
|
||||
let predicate;
|
||||
let columns: [string, string][];
|
||||
switch (true) {
|
||||
case isMap(optsOrUpdates):
|
||||
columns = Array.from(optsOrUpdates.entries());
|
||||
predicate = options?.where;
|
||||
break;
|
||||
case isValues && isMap(optsOrUpdates.values):
|
||||
columns = Array.from(optsOrUpdates.values.entries()).map(([k, v]) => [
|
||||
k,
|
||||
toSQL(v),
|
||||
]);
|
||||
predicate = optsOrUpdates.where;
|
||||
break;
|
||||
case isValues && !isMap(optsOrUpdates.values):
|
||||
columns = Object.entries(optsOrUpdates.values).map(([k, v]) => [
|
||||
k,
|
||||
toSQL(v),
|
||||
]);
|
||||
predicate = optsOrUpdates.where;
|
||||
break;
|
||||
|
||||
case isValuesSql && isMap(optsOrUpdates.valuesSql):
|
||||
columns = Array.from(optsOrUpdates.valuesSql.entries());
|
||||
predicate = optsOrUpdates.where;
|
||||
break;
|
||||
case isValuesSql && !isMap(optsOrUpdates.valuesSql):
|
||||
columns = Object.entries(optsOrUpdates.valuesSql).map(([k, v]) => [
|
||||
k,
|
||||
v,
|
||||
]);
|
||||
predicate = optsOrUpdates.where;
|
||||
break;
|
||||
default:
|
||||
columns = Object.entries(optsOrUpdates as Record<string, string>);
|
||||
predicate = options?.where;
|
||||
}
|
||||
|
||||
await this.#client.post(`${this.#tablePrefix}/update/`, {
|
||||
predicate: options?.where ?? null,
|
||||
updates: Object.entries(updates).map(([key, value]) => [key, value]),
|
||||
predicate: predicate ?? null,
|
||||
updates: columns,
|
||||
});
|
||||
}
|
||||
async countRows(filter?: unknown): Promise<number> {
|
||||
|
||||
@@ -40,6 +40,7 @@ import {
|
||||
} from "./native";
|
||||
import { Query, VectorQuery } from "./query";
|
||||
import { sanitizeTable } from "./sanitize";
|
||||
import { IntoSql, toSQL } from "./util";
|
||||
export { IndexConfig } from "./native";
|
||||
|
||||
/**
|
||||
@@ -123,6 +124,34 @@ export abstract class Table {
|
||||
* @param {Data} data Records to be inserted into the Table
|
||||
*/
|
||||
abstract add(data: Data, options?: Partial<AddDataOptions>): Promise<void>;
|
||||
/**
|
||||
* Update existing records in the Table
|
||||
* @param opts.values The values to update. The keys are the column names and the values
|
||||
* are the values to set.
|
||||
* @example
|
||||
* ```ts
|
||||
* table.update({where:"x = 2", values:{"vector": [10, 10]}})
|
||||
* ```
|
||||
*/
|
||||
abstract update(
|
||||
opts: {
|
||||
values: Map<string, IntoSql> | Record<string, IntoSql>;
|
||||
} & Partial<UpdateOptions>,
|
||||
): Promise<void>;
|
||||
/**
|
||||
* Update existing records in the Table
|
||||
* @param opts.valuesSql The values to update. The keys are the column names and the values
|
||||
* are the values to set. The values are SQL expressions.
|
||||
* @example
|
||||
* ```ts
|
||||
* table.update({where:"x = 2", valuesSql:{"x": "x + 1"}})
|
||||
* ```
|
||||
*/
|
||||
abstract update(
|
||||
opts: {
|
||||
valuesSql: Map<string, string> | Record<string, string>;
|
||||
} & Partial<UpdateOptions>,
|
||||
): Promise<void>;
|
||||
/**
|
||||
* Update existing records in the Table
|
||||
*
|
||||
@@ -152,6 +181,7 @@ export abstract class Table {
|
||||
updates: Map<string, string> | Record<string, string>,
|
||||
options?: Partial<UpdateOptions>,
|
||||
): Promise<void>;
|
||||
|
||||
/** Count the total number of rows in the dataset. */
|
||||
abstract countRows(filter?: string): Promise<number>;
|
||||
/** Delete the rows that satisfy the predicate. */
|
||||
@@ -471,17 +501,63 @@ export class LocalTable extends Table {
|
||||
}
|
||||
|
||||
async update(
|
||||
updates: Map<string, string> | Record<string, string>,
|
||||
optsOrUpdates:
|
||||
| (Map<string, string> | Record<string, string>)
|
||||
| ({
|
||||
values: Map<string, IntoSql> | Record<string, IntoSql>;
|
||||
} & Partial<UpdateOptions>)
|
||||
| ({
|
||||
valuesSql: Map<string, string> | Record<string, string>;
|
||||
} & Partial<UpdateOptions>),
|
||||
options?: Partial<UpdateOptions>,
|
||||
) {
|
||||
const onlyIf = options?.where;
|
||||
const isValues =
|
||||
"values" in optsOrUpdates && typeof optsOrUpdates.values !== "string";
|
||||
const isValuesSql =
|
||||
"valuesSql" in optsOrUpdates &&
|
||||
typeof optsOrUpdates.valuesSql !== "string";
|
||||
const isMap = (obj: unknown): obj is Map<string, string> => {
|
||||
return obj instanceof Map;
|
||||
};
|
||||
|
||||
let predicate;
|
||||
let columns: [string, string][];
|
||||
if (updates instanceof Map) {
|
||||
columns = Array.from(updates.entries());
|
||||
} else {
|
||||
columns = Object.entries(updates);
|
||||
switch (true) {
|
||||
case isMap(optsOrUpdates):
|
||||
columns = Array.from(optsOrUpdates.entries());
|
||||
predicate = options?.where;
|
||||
break;
|
||||
case isValues && isMap(optsOrUpdates.values):
|
||||
columns = Array.from(optsOrUpdates.values.entries()).map(([k, v]) => [
|
||||
k,
|
||||
toSQL(v),
|
||||
]);
|
||||
predicate = optsOrUpdates.where;
|
||||
break;
|
||||
case isValues && !isMap(optsOrUpdates.values):
|
||||
columns = Object.entries(optsOrUpdates.values).map(([k, v]) => [
|
||||
k,
|
||||
toSQL(v),
|
||||
]);
|
||||
predicate = optsOrUpdates.where;
|
||||
break;
|
||||
|
||||
case isValuesSql && isMap(optsOrUpdates.valuesSql):
|
||||
columns = Array.from(optsOrUpdates.valuesSql.entries());
|
||||
predicate = optsOrUpdates.where;
|
||||
break;
|
||||
case isValuesSql && !isMap(optsOrUpdates.valuesSql):
|
||||
columns = Object.entries(optsOrUpdates.valuesSql).map(([k, v]) => [
|
||||
k,
|
||||
v,
|
||||
]);
|
||||
predicate = optsOrUpdates.where;
|
||||
break;
|
||||
default:
|
||||
columns = Object.entries(optsOrUpdates as Record<string, string>);
|
||||
predicate = options?.where;
|
||||
}
|
||||
await this.inner.update(onlyIf, columns);
|
||||
await this.inner.update(predicate, columns);
|
||||
}
|
||||
|
||||
async countRows(filter?: string): Promise<number> {
|
||||
|
||||
@@ -1,3 +1,37 @@
|
||||
export type IntoSql =
|
||||
| string
|
||||
| number
|
||||
| boolean
|
||||
| null
|
||||
| Date
|
||||
| ArrayBufferLike
|
||||
| Buffer
|
||||
| IntoSql[];
|
||||
|
||||
export function toSQL(value: IntoSql): string {
|
||||
if (typeof value === "string") {
|
||||
return `'${value.replace(/'/g, "''")}'`;
|
||||
} else if (typeof value === "number") {
|
||||
return value.toString();
|
||||
} else if (typeof value === "boolean") {
|
||||
return value ? "TRUE" : "FALSE";
|
||||
} else if (value === null) {
|
||||
return "NULL";
|
||||
} else if (value instanceof Date) {
|
||||
return `'${value.toISOString()}'`;
|
||||
} else if (Array.isArray(value)) {
|
||||
return `[${value.map(toSQL).join(", ")}]`;
|
||||
} else if (Buffer.isBuffer(value)) {
|
||||
return `X'${value.toString("hex")}'`;
|
||||
} else if (value instanceof ArrayBuffer) {
|
||||
return `X'${Buffer.from(value).toString("hex")}'`;
|
||||
} else {
|
||||
throw new Error(
|
||||
`Unsupported value type: ${typeof value} value: (${value})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export class TTLCache {
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||
private readonly cache: Map<string, { value: any; expires: number }>;
|
||||
|
||||
208
nodejs/native.d.ts
vendored
Normal file
208
nodejs/native.d.ts
vendored
Normal file
@@ -0,0 +1,208 @@
|
||||
/* tslint:disable */
|
||||
/* eslint-disable */
|
||||
|
||||
/* auto-generated by NAPI-RS */
|
||||
|
||||
/** A description of an index currently configured on a column */
|
||||
export interface IndexConfig {
|
||||
/** The name of the index */
|
||||
name: string
|
||||
/** The type of the index */
|
||||
indexType: string
|
||||
/**
|
||||
* The columns in the index
|
||||
*
|
||||
* Currently this is always an array of size 1. In the future there may
|
||||
* be more columns to represent composite indices.
|
||||
*/
|
||||
columns: Array<string>
|
||||
}
|
||||
/** Statistics about a compaction operation. */
|
||||
export interface CompactionStats {
|
||||
/** The number of fragments removed */
|
||||
fragmentsRemoved: number
|
||||
/** The number of new, compacted fragments added */
|
||||
fragmentsAdded: number
|
||||
/** The number of data files removed */
|
||||
filesRemoved: number
|
||||
/** The number of new, compacted data files added */
|
||||
filesAdded: number
|
||||
}
|
||||
/** Statistics about a cleanup operation */
|
||||
export interface RemovalStats {
|
||||
/** The number of bytes removed */
|
||||
bytesRemoved: number
|
||||
/** The number of old versions removed */
|
||||
oldVersionsRemoved: number
|
||||
}
|
||||
/** Statistics about an optimize operation */
|
||||
export interface OptimizeStats {
|
||||
/** Statistics about the compaction operation */
|
||||
compaction: CompactionStats
|
||||
/** Statistics about the removal operation */
|
||||
prune: RemovalStats
|
||||
}
|
||||
/**
|
||||
* A definition of a column alteration. The alteration changes the column at
|
||||
* `path` to have the new name `name`, to be nullable if `nullable` is true,
|
||||
* and to have the data type `data_type`. At least one of `rename` or `nullable`
|
||||
* must be provided.
|
||||
*/
|
||||
export interface ColumnAlteration {
|
||||
/**
|
||||
* The path to the column to alter. This is a dot-separated path to the column.
|
||||
* If it is a top-level column then it is just the name of the column. If it is
|
||||
* a nested column then it is the path to the column, e.g. "a.b.c" for a column
|
||||
* `c` nested inside a column `b` nested inside a column `a`.
|
||||
*/
|
||||
path: string
|
||||
/**
|
||||
* The new name of the column. If not provided then the name will not be changed.
|
||||
* This must be distinct from the names of all other columns in the table.
|
||||
*/
|
||||
rename?: string
|
||||
/** Set the new nullability. Note that a nullable column cannot be made non-nullable. */
|
||||
nullable?: boolean
|
||||
}
|
||||
/** A definition of a new column to add to a table. */
|
||||
export interface AddColumnsSql {
|
||||
/** The name of the new column. */
|
||||
name: string
|
||||
/**
|
||||
* The values to populate the new column with, as a SQL expression.
|
||||
* The expression can reference other columns in the table.
|
||||
*/
|
||||
valueSql: string
|
||||
}
|
||||
export interface IndexStatistics {
|
||||
/** The number of rows indexed by the index */
|
||||
numIndexedRows: number
|
||||
/** The number of rows not indexed */
|
||||
numUnindexedRows: number
|
||||
/** The type of the index */
|
||||
indexType?: string
|
||||
/** The metadata for each index */
|
||||
indices: Array<IndexMetadata>
|
||||
}
|
||||
export interface IndexMetadata {
|
||||
metricType?: string
|
||||
indexType?: string
|
||||
}
|
||||
export interface ConnectionOptions {
|
||||
/**
|
||||
* (For LanceDB OSS only): The interval, in seconds, at which to check for
|
||||
* updates to the table from other processes. If None, then consistency is not
|
||||
* checked. For performance reasons, this is the default. For strong
|
||||
* consistency, set this to zero seconds. Then every read will check for
|
||||
* updates from other processes. As a compromise, you can set this to a
|
||||
* non-zero value for eventual consistency. If more than that interval
|
||||
* has passed since the last check, then the table will be checked for updates.
|
||||
* Note: this consistency only applies to read operations. Write operations are
|
||||
* always consistent.
|
||||
*/
|
||||
readConsistencyInterval?: number
|
||||
/**
|
||||
* (For LanceDB OSS only): configuration for object storage.
|
||||
*
|
||||
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
*/
|
||||
storageOptions?: Record<string, string>
|
||||
}
|
||||
/** Write mode for writing a table. */
|
||||
export const enum WriteMode {
|
||||
Create = 'Create',
|
||||
Append = 'Append',
|
||||
Overwrite = 'Overwrite'
|
||||
}
|
||||
/** Write options when creating a Table. */
|
||||
export interface WriteOptions {
|
||||
/** Write mode for writing to a table. */
|
||||
mode?: WriteMode
|
||||
}
|
||||
export interface OpenTableOptions {
|
||||
storageOptions?: Record<string, string>
|
||||
}
|
||||
export class Connection {
|
||||
/** Create a new Connection instance from the given URI. */
|
||||
static new(uri: string, options: ConnectionOptions): Promise<Connection>
|
||||
display(): string
|
||||
isOpen(): boolean
|
||||
close(): void
|
||||
/** List all tables in the dataset. */
|
||||
tableNames(startAfter?: string | undefined | null, limit?: number | undefined | null): Promise<Array<string>>
|
||||
/**
|
||||
* Create table from a Apache Arrow IPC (file) buffer.
|
||||
*
|
||||
* Parameters:
|
||||
* - name: The name of the table.
|
||||
* - buf: The buffer containing the IPC file.
|
||||
*
|
||||
*/
|
||||
createTable(name: string, buf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
||||
createEmptyTable(name: string, schemaBuf: Buffer, mode: string, storageOptions?: Record<string, string> | undefined | null, useLegacyFormat?: boolean | undefined | null): Promise<Table>
|
||||
openTable(name: string, storageOptions?: Record<string, string> | undefined | null, indexCacheSize?: number | undefined | null): Promise<Table>
|
||||
/** Drop table with the name. Or raise an error if the table does not exist. */
|
||||
dropTable(name: string): Promise<void>
|
||||
}
|
||||
export class Index {
|
||||
static ivfPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
|
||||
static btree(): Index
|
||||
}
|
||||
/** Typescript-style Async Iterator over RecordBatches */
|
||||
export class RecordBatchIterator {
|
||||
next(): Promise<Buffer | null>
|
||||
}
|
||||
/** A builder used to create and run a merge insert operation */
|
||||
export class NativeMergeInsertBuilder {
|
||||
whenMatchedUpdateAll(condition?: string | undefined | null): NativeMergeInsertBuilder
|
||||
whenNotMatchedInsertAll(): NativeMergeInsertBuilder
|
||||
whenNotMatchedBySourceDelete(filter?: string | undefined | null): NativeMergeInsertBuilder
|
||||
execute(buf: Buffer): Promise<void>
|
||||
}
|
||||
export class Query {
|
||||
onlyIf(predicate: string): void
|
||||
select(columns: Array<[string, string]>): void
|
||||
limit(limit: number): void
|
||||
nearestTo(vector: Float32Array): VectorQuery
|
||||
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
||||
explainPlan(verbose: boolean): Promise<string>
|
||||
}
|
||||
export class VectorQuery {
|
||||
column(column: string): void
|
||||
distanceType(distanceType: string): void
|
||||
postfilter(): void
|
||||
refineFactor(refineFactor: number): void
|
||||
nprobes(nprobe: number): void
|
||||
bypassVectorIndex(): void
|
||||
onlyIf(predicate: string): void
|
||||
select(columns: Array<[string, string]>): void
|
||||
limit(limit: number): void
|
||||
execute(maxBatchLength?: number | undefined | null): Promise<RecordBatchIterator>
|
||||
explainPlan(verbose: boolean): Promise<string>
|
||||
}
|
||||
export class Table {
|
||||
name: string
|
||||
display(): string
|
||||
isOpen(): boolean
|
||||
close(): void
|
||||
/** Return Schema as empty Arrow IPC file. */
|
||||
schema(): Promise<Buffer>
|
||||
add(buf: Buffer, mode: string): Promise<void>
|
||||
countRows(filter?: string | undefined | null): Promise<number>
|
||||
delete(predicate: string): Promise<void>
|
||||
createIndex(index: Index | undefined | null, column: string, replace?: boolean | undefined | null): Promise<void>
|
||||
update(onlyIf: string | undefined | null, columns: Array<[string, string]>): Promise<void>
|
||||
query(): Query
|
||||
vectorSearch(vector: Float32Array): VectorQuery
|
||||
addColumns(transforms: Array<AddColumnsSql>): Promise<void>
|
||||
alterColumns(alterations: Array<ColumnAlteration>): Promise<void>
|
||||
dropColumns(columns: Array<string>): Promise<void>
|
||||
version(): Promise<number>
|
||||
checkout(version: number): Promise<void>
|
||||
checkoutLatest(): Promise<void>
|
||||
restore(): Promise<void>
|
||||
optimize(olderThanMs?: number | undefined | null): Promise<OptimizeStats>
|
||||
listIndices(): Promise<Array<IndexConfig>>
|
||||
indexStats(indexName: string): Promise<IndexStatistics | null>
|
||||
mergeInsert(on: Array<string>): NativeMergeInsertBuilder
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.6.0",
|
||||
"version": "0.7.0",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.6.0",
|
||||
"version": "0.7.0",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.6.0",
|
||||
"version": "0.7.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.6.0",
|
||||
"version": "0.7.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.6.0",
|
||||
"version": "0.7.0",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
"vector database",
|
||||
"ann"
|
||||
],
|
||||
"version": "0.6.0",
|
||||
"version": "0.7.0",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.9.0"
|
||||
current_version = "0.10.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.9.0"
|
||||
version = "0.10.1"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -3,7 +3,7 @@ name = "lancedb"
|
||||
# version in Cargo.toml
|
||||
dependencies = [
|
||||
"deprecation",
|
||||
"pylance==0.14.0",
|
||||
"pylance==0.14.1",
|
||||
"ratelimiter~=1.0",
|
||||
"requests>=2.31.0",
|
||||
"retry>=0.9.2",
|
||||
|
||||
@@ -15,7 +15,7 @@ import importlib.metadata
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import timedelta
|
||||
from typing import Dict, Optional, Union
|
||||
from typing import Dict, Optional, Union, Any
|
||||
|
||||
__version__ = importlib.metadata.version("lancedb")
|
||||
|
||||
@@ -35,7 +35,7 @@ def connect(
|
||||
host_override: Optional[str] = None,
|
||||
read_consistency_interval: Optional[timedelta] = None,
|
||||
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
|
||||
**kwargs,
|
||||
**kwargs: Any,
|
||||
) -> DBConnection:
|
||||
"""Connect to a LanceDB database.
|
||||
|
||||
|
||||
@@ -15,8 +15,9 @@ import os
|
||||
import io
|
||||
import requests
|
||||
import base64
|
||||
import urllib.parse as urlparse
|
||||
from typing import ClassVar, List, Union, Optional, TYPE_CHECKING
|
||||
from urllib.parse import urlparse
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, ClassVar, List, Union, Optional, Any, Dict
|
||||
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
@@ -32,6 +33,14 @@ if TYPE_CHECKING:
|
||||
API_URL = "https://api.jina.ai/v1/embeddings"
|
||||
|
||||
|
||||
def is_valid_url(text):
|
||||
try:
|
||||
parsed = urlparse(text)
|
||||
return bool(parsed.scheme) and bool(parsed.netloc)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
@register("jina")
|
||||
class JinaEmbeddings(EmbeddingFunction):
|
||||
"""
|
||||
@@ -58,67 +67,35 @@ class JinaEmbeddings(EmbeddingFunction):
|
||||
# TODO: fix hardcoding
|
||||
return 768
|
||||
|
||||
def sanitize_input(self, inputs: IMAGES) -> Union[List[bytes], np.ndarray]:
|
||||
def sanitize_input(
|
||||
self, inputs: Union[TEXT, IMAGES]
|
||||
) -> Union[List[Any], np.ndarray]:
|
||||
"""
|
||||
Sanitize the input to the embedding function.
|
||||
"""
|
||||
if isinstance(inputs, (str, bytes)):
|
||||
if isinstance(inputs, (str, bytes, Path)):
|
||||
inputs = [inputs]
|
||||
elif isinstance(inputs, pa.Array):
|
||||
inputs = inputs.to_pylist()
|
||||
elif isinstance(inputs, pa.ChunkedArray):
|
||||
inputs = inputs.combine_chunks().to_pylist()
|
||||
else:
|
||||
if isinstance(inputs, list):
|
||||
inputs = inputs
|
||||
else:
|
||||
PIL = attempt_import_or_raise("PIL", "pillow")
|
||||
if isinstance(inputs, PIL.Image.Image):
|
||||
inputs = [inputs]
|
||||
return inputs
|
||||
|
||||
def compute_query_embeddings(
|
||||
self, query: Union[str, "PIL.Image.Image"], *args, **kwargs
|
||||
) -> List[np.ndarray]:
|
||||
"""
|
||||
Compute the embeddings for a given user query
|
||||
|
||||
Parameters
|
||||
----------
|
||||
query : Union[str, PIL.Image.Image]
|
||||
The query to embed. A query can be either text or an image.
|
||||
"""
|
||||
if isinstance(query, str):
|
||||
return self.generate_text_embeddings([query])
|
||||
else:
|
||||
PIL = attempt_import_or_raise("PIL", "pillow")
|
||||
if isinstance(query, PIL.Image.Image):
|
||||
return [self.generate_image_embedding(query)]
|
||||
else:
|
||||
raise TypeError(
|
||||
"JinaEmbeddingFunction supports str or PIL Image as query"
|
||||
)
|
||||
|
||||
def compute_source_embeddings(self, texts: TEXT, *args, **kwargs) -> List[np.array]:
|
||||
texts = self.sanitize_input(texts)
|
||||
return self.generate_text_embeddings(texts)
|
||||
|
||||
def generate_image_embedding(
|
||||
self, image: Union[str, bytes, "PIL.Image.Image"]
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Generate the embedding for a single image
|
||||
|
||||
Parameters
|
||||
----------
|
||||
image : Union[str, bytes, PIL.Image.Image]
|
||||
The image to embed. If the image is a str, it is treated as a uri.
|
||||
If the image is bytes, it is treated as the raw image bytes.
|
||||
"""
|
||||
PIL = attempt_import_or_raise("PIL", "pillow")
|
||||
@staticmethod
|
||||
def _generate_image_input_dict(image: Union[str, bytes, "PIL.Image.Image"]) -> Dict:
|
||||
if isinstance(image, bytes):
|
||||
image = {"image": base64.b64encode(image).decode("utf-8")}
|
||||
if isinstance(image, PIL.Image.Image):
|
||||
buffered = io.BytesIO()
|
||||
image.save(buffered, format="PNG")
|
||||
image_bytes = buffered.getvalue()
|
||||
image = {"image": base64.b64encode(image_bytes).decode("utf-8")}
|
||||
elif isinstance(image, str):
|
||||
image_dict = {"image": base64.b64encode(image).decode("utf-8")}
|
||||
elif isinstance(image, (str, Path)):
|
||||
parsed = urlparse.urlparse(image)
|
||||
# TODO handle drive letter on windows.
|
||||
PIL = attempt_import_or_raise("PIL", "pillow")
|
||||
if parsed.scheme == "file":
|
||||
pil_image = PIL.Image.open(parsed.path)
|
||||
elif parsed.scheme == "":
|
||||
@@ -130,8 +107,95 @@ class JinaEmbeddings(EmbeddingFunction):
|
||||
buffered = io.BytesIO()
|
||||
pil_image.save(buffered, format="PNG")
|
||||
image_bytes = buffered.getvalue()
|
||||
image = {"image": base64.b64encode(image_bytes).decode("utf-8")}
|
||||
return self._generate_embeddings(input=[image])[0]
|
||||
image_dict = {"image": base64.b64encode(image_bytes).decode("utf-8")}
|
||||
else:
|
||||
PIL = attempt_import_or_raise("PIL", "pillow")
|
||||
|
||||
if isinstance(image, PIL.Image.Image):
|
||||
buffered = io.BytesIO()
|
||||
image.save(buffered, format="PNG")
|
||||
image_bytes = buffered.getvalue()
|
||||
image_dict = {"image": base64.b64encode(image_bytes).decode("utf-8")}
|
||||
else:
|
||||
raise TypeError(
|
||||
f"JinaEmbeddingFunction supports str, Path, bytes or PIL Image"
|
||||
f" as query, but {type(image)} is given"
|
||||
)
|
||||
return image_dict
|
||||
|
||||
def compute_query_embeddings(
|
||||
self, query: Union[str, bytes, "Path", "PIL.Image.Image"], *args, **kwargs
|
||||
) -> List[np.ndarray]:
|
||||
"""
|
||||
Compute the embeddings for a given user query
|
||||
|
||||
Parameters
|
||||
----------
|
||||
query : Union[str, PIL.Image.Image]
|
||||
The query to embed. A query can be either text or an image.
|
||||
"""
|
||||
if isinstance(query, str):
|
||||
if not is_valid_url(query):
|
||||
return self.generate_text_embeddings([query])
|
||||
else:
|
||||
return [self.generate_image_embedding(query)]
|
||||
elif isinstance(query, (Path, bytes)):
|
||||
return [self.generate_image_embedding(query)]
|
||||
else:
|
||||
PIL = attempt_import_or_raise("PIL", "pillow")
|
||||
|
||||
if isinstance(query, PIL.Image.Image):
|
||||
return [self.generate_image_embedding(query)]
|
||||
else:
|
||||
raise TypeError(
|
||||
f"JinaEmbeddingFunction supports str, Path, bytes or PIL Image"
|
||||
f" as query, but {type(query)} is given"
|
||||
)
|
||||
|
||||
def compute_source_embeddings(
|
||||
self, inputs: Union[TEXT, IMAGES], *args, **kwargs
|
||||
) -> List[np.array]:
|
||||
inputs = self.sanitize_input(inputs)
|
||||
model_inputs = []
|
||||
image_inputs = 0
|
||||
|
||||
def process_input(input, model_inputs, image_inputs):
|
||||
if isinstance(input, str):
|
||||
if not is_valid_url(input):
|
||||
model_inputs.append({"text": input})
|
||||
else:
|
||||
image_inputs += 1
|
||||
model_inputs.append(self._generate_image_input_dict(input))
|
||||
elif isinstance(input, list):
|
||||
for _input in input:
|
||||
image_inputs = process_input(_input, model_inputs, image_inputs)
|
||||
else:
|
||||
image_inputs += 1
|
||||
model_inputs.append(self._generate_image_input_dict(input))
|
||||
return image_inputs
|
||||
|
||||
for input in inputs:
|
||||
image_inputs = process_input(input, model_inputs, image_inputs)
|
||||
|
||||
if image_inputs > 0:
|
||||
return self._generate_embeddings(model_inputs)
|
||||
else:
|
||||
return self.generate_text_embeddings(inputs)
|
||||
|
||||
def generate_image_embedding(
|
||||
self, image: Union[str, bytes, Path, "PIL.Image.Image"]
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Generate the embedding for a single image
|
||||
|
||||
Parameters
|
||||
----------
|
||||
image : Union[str, bytes, PIL.Image.Image]
|
||||
The image to embed. If the image is a str, it is treated as a uri.
|
||||
If the image is bytes, it is treated as the raw image bytes.
|
||||
"""
|
||||
image_dict = self._generate_image_input_dict(image)
|
||||
return self._generate_embeddings(input=[image_dict])[0]
|
||||
|
||||
def generate_text_embeddings(
|
||||
self, texts: Union[List[str], np.ndarray], *args, **kwargs
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
from typing import List
|
||||
|
||||
import pyarrow as pa
|
||||
|
||||
from .base import Reranker
|
||||
@@ -112,6 +110,6 @@ class LinearCombinationReranker(Reranker):
|
||||
# these scores represent distance
|
||||
return 1 - (self.weight * score1 + (1 - self.weight) * score2)
|
||||
|
||||
def _invert_score(self, scores: List[float]):
|
||||
# Invert the scores between relevance and distance
|
||||
return 1 - scores
|
||||
def _invert_score(self, score: float):
|
||||
# Invert the score between relevance and distance
|
||||
return 1 - score
|
||||
|
||||
@@ -1,16 +1,7 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The Lance Authors
|
||||
|
||||
import binascii
|
||||
import functools
|
||||
import importlib
|
||||
import os
|
||||
@@ -231,6 +222,15 @@ def _(value: str):
|
||||
return f"'{value}'"
|
||||
|
||||
|
||||
@value_to_sql.register(bytes)
|
||||
def _(value: bytes):
|
||||
"""Convert bytes to a hex string literal.
|
||||
|
||||
See https://datafusion.apache.org/user-guide/sql/data_types.html#binary-types
|
||||
"""
|
||||
return f"X'{binascii.hexlify(value).decode()}'"
|
||||
|
||||
|
||||
@value_to_sql.register(int)
|
||||
def _(value: int):
|
||||
return str(value)
|
||||
|
||||
@@ -1,15 +1,5 @@
|
||||
# Copyright 2023 LanceDB Developers
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The Lance Authors
|
||||
|
||||
import functools
|
||||
from copy import copy
|
||||
@@ -499,6 +489,7 @@ def test_update_types(db):
|
||||
"date": date(2021, 1, 1),
|
||||
"vector1": [1.0, 0.0],
|
||||
"vector2": [1.0, 1.0],
|
||||
"binary": b"abc",
|
||||
}
|
||||
],
|
||||
)
|
||||
@@ -512,6 +503,7 @@ def test_update_types(db):
|
||||
date="DATE '2021-01-02'",
|
||||
vector1="[2.0, 2.0]",
|
||||
vector2="[3.0, 3.0]",
|
||||
binary="X'646566'",
|
||||
)
|
||||
)
|
||||
actual = table.to_arrow().to_pylist()[0]
|
||||
@@ -523,6 +515,7 @@ def test_update_types(db):
|
||||
date=date(2021, 1, 2),
|
||||
vector1=[2.0, 2.0],
|
||||
vector2=[3.0, 3.0],
|
||||
binary=b"def",
|
||||
)
|
||||
assert actual == expected
|
||||
|
||||
@@ -536,6 +529,7 @@ def test_update_types(db):
|
||||
date=date(2021, 1, 3),
|
||||
vector1=[3.0, 3.0],
|
||||
vector2=np.array([4.0, 4.0]),
|
||||
binary=b"def",
|
||||
)
|
||||
)
|
||||
actual = table.to_arrow().to_pylist()[0]
|
||||
@@ -547,6 +541,7 @@ def test_update_types(db):
|
||||
date=date(2021, 1, 3),
|
||||
vector1=[3.0, 3.0],
|
||||
vector2=[4.0, 4.0],
|
||||
binary=b"def",
|
||||
)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-node"
|
||||
version = "0.6.0"
|
||||
version = "0.7.0"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.6.0"
|
||||
version = "0.7.0"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
// --8<-- [start:imports]
|
||||
|
||||
use std::{iter::once, sync::Arc};
|
||||
|
||||
use arrow_array::{Float64Array, Int32Array, RecordBatch, RecordBatchIterator, StringArray};
|
||||
@@ -11,6 +13,9 @@ use lancedb::{
|
||||
Result,
|
||||
};
|
||||
|
||||
// --8<-- [end:imports]
|
||||
|
||||
// --8<-- [start:openai_embeddings]
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let tempdir = tempfile::tempdir().unwrap();
|
||||
@@ -35,7 +40,6 @@ async fn main() -> Result<()> {
|
||||
.execute()
|
||||
.await?;
|
||||
|
||||
// there is no equivalent to '.search(<query>)' yet
|
||||
let query = Arc::new(StringArray::from_iter_values(once("something warm")));
|
||||
let query_vector = embedding.compute_query_embeddings(query)?;
|
||||
let mut results = table
|
||||
@@ -53,9 +57,9 @@ async fn main() -> Result<()> {
|
||||
.unwrap();
|
||||
let text = out.iter().next().unwrap().unwrap();
|
||||
println!("Closest match: {}", text);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
// --8<-- [end:openai_embeddings]
|
||||
|
||||
fn make_data() -> impl IntoArrow {
|
||||
let schema = Schema::new(vec![
|
||||
|
||||
@@ -35,7 +35,6 @@ use lance::dataset::{
|
||||
Dataset, UpdateBuilder as LanceUpdateBuilder, WhenMatched, WriteMode, WriteParams,
|
||||
};
|
||||
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
|
||||
use lance::index::scalar::ScalarIndexType;
|
||||
use lance::io::WrappingObjectStore;
|
||||
use lance_datafusion::exec::execute_plan;
|
||||
use lance_index::vector::hnsw::builder::HnswBuildParams;
|
||||
@@ -1505,7 +1504,7 @@ impl NativeTable {
|
||||
|
||||
let mut dataset = self.dataset.get_mut().await?;
|
||||
let lance_idx_params = lance::index::scalar::ScalarIndexParams {
|
||||
force_index_type: Some(ScalarIndexType::BTree),
|
||||
force_index_type: Some(lance::index::scalar::ScalarIndexType::BTree),
|
||||
};
|
||||
dataset
|
||||
.create_index(
|
||||
|
||||
Reference in New Issue
Block a user