Compare commits

..

1 Commits

Author SHA1 Message Date
albertlockett
3228fb9cd9 test 2024-10-08 18:28:02 -04:00
86 changed files with 1830 additions and 3355 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.11.1-beta.0"
current_version = "0.11.0-beta.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.
@@ -66,32 +66,6 @@ glob = "nodejs/npm/*/package.json"
replace = "\"version\": \"{new_version}\","
search = "\"version\": \"{current_version}\","
# vectodb node binary packages
[[tool.bumpversion.files]]
glob = "node/package.json"
replace = "\"@lancedb/vectordb-darwin-arm64\": \"{new_version}\""
search = "\"@lancedb/vectordb-darwin-arm64\": \"{current_version}\""
[[tool.bumpversion.files]]
glob = "node/package.json"
replace = "\"@lancedb/vectordb-darwin-x64\": \"{new_version}\""
search = "\"@lancedb/vectordb-darwin-x64\": \"{current_version}\""
[[tool.bumpversion.files]]
glob = "node/package.json"
replace = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{new_version}\""
search = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{current_version}\""
[[tool.bumpversion.files]]
glob = "node/package.json"
replace = "\"@lancedb/vectordb-linux-x64-gnu\": \"{new_version}\""
search = "\"@lancedb/vectordb-linux-x64-gnu\": \"{current_version}\""
[[tool.bumpversion.files]]
glob = "node/package.json"
replace = "\"@lancedb/vectordb-win32-x64-msvc\": \"{new_version}\""
search = "\"@lancedb/vectordb-win32-x64-msvc\": \"{current_version}\""
# Cargo files
# ------------
[[tool.bumpversion.files]]
@@ -103,8 +77,3 @@ search = "\nversion = \"{current_version}\""
filename = "rust/lancedb/Cargo.toml"
replace = "\nversion = \"{new_version}\""
search = "\nversion = \"{current_version}\""
[[tool.bumpversion.files]]
filename = "nodejs/Cargo.toml"
replace = "\nversion = \"{new_version}\""
search = "\nversion = \"{current_version}\""

View File

@@ -20,15 +20,13 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
categories = ["database-implementations"]
[workspace.dependencies]
lance = { "version" = "=0.18.3", "features" = [
"dynamodb",
], git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-index = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-linalg = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-table = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-testing = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-datafusion = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance-encoding = { "version" = "=0.18.3", git = "https://github.com/lancedb/lance.git", tag = "v0.18.3-beta.2" }
lance = { "version" = "=0.18.0", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.18.0" }
lance-linalg = { "version" = "=0.18.0" }
lance-table = { "version" = "=0.18.0" }
lance-testing = { "version" = "=0.18.0" }
lance-datafusion = { "version" = "=0.18.0" }
lance-encoding = { "version" = "=0.18.0" }
# Note that this one does not include pyarrow
arrow = { version = "52.2", optional = false }
arrow-array = "52.2"
@@ -40,8 +38,8 @@ arrow-arith = "52.2"
arrow-cast = "52.2"
async-trait = "0"
chrono = "0.4.35"
datafusion-common = "41.0"
datafusion-physical-plan = "41.0"
datafusion-common = "40.0"
datafusion-physical-plan = "40.0"
half = { "version" = "=2.4.1", default-features = false, features = [
"num-traits",
] }

View File

@@ -90,9 +90,6 @@ markdown_extensions:
- pymdownx.emoji:
emoji_index: !!python/name:material.extensions.emoji.twemoji
emoji_generator: !!python/name:material.extensions.emoji.to_svg
- markdown.extensions.toc:
baselevel: 1
permalink: ""
nav:
- Home:

View File

@@ -498,7 +498,7 @@ This can also be done with the ``AWS_ENDPOINT`` and ``AWS_DEFAULT_REGION`` envir
#### S3 Express
LanceDB supports [S3 Express One Zone](https://aws.amazon.com/s3/storage-classes/express-one-zone/) endpoints, but requires additional infrastructure configuration for the compute service, such as EC2 or Lambda. Please refer to [Networking requirements for S3 Express One Zone](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-express-networking.html).
LanceDB supports [S3 Express One Zone](https://aws.amazon.com/s3/storage-classes/express-one-zone/) endpoints, but requires additional configuration. Also, S3 Express endpoints only support connecting from an EC2 instance within the same region.
To configure LanceDB to use an S3 Express endpoint, you must set the storage option `s3_express`. The bucket name in your table URI should **include the suffix**.

View File

@@ -41,6 +41,7 @@ To build everything fresh:
```bash
npm install
npm run tsc
npm run build
```
@@ -50,6 +51,18 @@ Then you should be able to run the tests with:
npm test
```
### Rebuilding Rust library
```bash
npm run build
```
### Rebuilding Typescript
```bash
npm run tsc
```
### Fix lints
To run the linter and have it automatically fix all errors

View File

@@ -38,4 +38,4 @@ A [WriteMode](../enums/WriteMode.md) to use on this operation
#### Defined in
[index.ts:1359](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1359)
[index.ts:1019](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1019)

View File

@@ -30,7 +30,6 @@ A connection to a LanceDB database.
- [dropTable](LocalConnection.md#droptable)
- [openTable](LocalConnection.md#opentable)
- [tableNames](LocalConnection.md#tablenames)
- [withMiddleware](LocalConnection.md#withmiddleware)
## Constructors
@@ -47,7 +46,7 @@ A connection to a LanceDB database.
#### Defined in
[index.ts:739](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L739)
[index.ts:489](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L489)
## Properties
@@ -57,7 +56,7 @@ A connection to a LanceDB database.
#### Defined in
[index.ts:737](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L737)
[index.ts:487](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L487)
___
@@ -75,7 +74,7 @@ ___
#### Defined in
[index.ts:736](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L736)
[index.ts:486](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L486)
## Accessors
@@ -93,7 +92,7 @@ ___
#### Defined in
[index.ts:744](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L744)
[index.ts:494](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L494)
## Methods
@@ -114,7 +113,7 @@ Creates a new Table, optionally initializing it with new data.
| Name | Type |
| :------ | :------ |
| `name` | `string` \| [`CreateTableOptions`](../interfaces/CreateTableOptions.md)\<`T`\> |
| `data?` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] |
| `data?` | `Record`\<`string`, `unknown`\>[] |
| `optsOrEmbedding?` | [`WriteOptions`](../interfaces/WriteOptions.md) \| [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)\<`T`\> |
| `opt?` | [`WriteOptions`](../interfaces/WriteOptions.md) |
@@ -128,7 +127,7 @@ Creates a new Table, optionally initializing it with new data.
#### Defined in
[index.ts:788](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L788)
[index.ts:542](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L542)
___
@@ -159,7 +158,7 @@ ___
#### Defined in
[index.ts:822](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L822)
[index.ts:576](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L576)
___
@@ -185,7 +184,7 @@ Drop an existing table.
#### Defined in
[index.ts:876](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L876)
[index.ts:630](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L630)
___
@@ -211,7 +210,7 @@ Open a table in the database.
#### Defined in
[index.ts:760](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L760)
[index.ts:510](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L510)
**openTable**\<`T`\>(`name`, `embeddings`): `Promise`\<[`Table`](../interfaces/Table.md)\<`T`\>\>
@@ -240,7 +239,7 @@ Connection.openTable
#### Defined in
[index.ts:768](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L768)
[index.ts:518](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L518)
**openTable**\<`T`\>(`name`, `embeddings?`): `Promise`\<[`Table`](../interfaces/Table.md)\<`T`\>\>
@@ -267,7 +266,7 @@ Connection.openTable
#### Defined in
[index.ts:772](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L772)
[index.ts:522](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L522)
___
@@ -287,36 +286,4 @@ Get the names of all tables in the database.
#### Defined in
[index.ts:751](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L751)
___
### withMiddleware
**withMiddleware**(`middleware`): [`Connection`](../interfaces/Connection.md)
Instrument the behavior of this Connection with middleware.
The middleware will be called in the order they are added.
Currently this functionality is only supported for remote Connections.
#### Parameters
| Name | Type |
| :------ | :------ |
| `middleware` | `HttpMiddleware` |
#### Returns
[`Connection`](../interfaces/Connection.md)
- this Connection instrumented by the passed middleware
#### Implementation of
[Connection](../interfaces/Connection.md).[withMiddleware](../interfaces/Connection.md#withmiddleware)
#### Defined in
[index.ts:880](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L880)
[index.ts:501](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L501)

View File

@@ -37,8 +37,6 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
### Methods
- [add](LocalTable.md#add)
- [addColumns](LocalTable.md#addcolumns)
- [alterColumns](LocalTable.md#altercolumns)
- [checkElectron](LocalTable.md#checkelectron)
- [cleanupOldVersions](LocalTable.md#cleanupoldversions)
- [compactFiles](LocalTable.md#compactfiles)
@@ -46,16 +44,13 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
- [createIndex](LocalTable.md#createindex)
- [createScalarIndex](LocalTable.md#createscalarindex)
- [delete](LocalTable.md#delete)
- [dropColumns](LocalTable.md#dropcolumns)
- [filter](LocalTable.md#filter)
- [getSchema](LocalTable.md#getschema)
- [indexStats](LocalTable.md#indexstats)
- [listIndices](LocalTable.md#listindices)
- [mergeInsert](LocalTable.md#mergeinsert)
- [overwrite](LocalTable.md#overwrite)
- [search](LocalTable.md#search)
- [update](LocalTable.md#update)
- [withMiddleware](LocalTable.md#withmiddleware)
## Constructors
@@ -79,7 +74,7 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
#### Defined in
[index.ts:892](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L892)
[index.ts:642](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L642)
**new LocalTable**\<`T`\>(`tbl`, `name`, `options`, `embeddings`)
@@ -100,7 +95,7 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
#### Defined in
[index.ts:899](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L899)
[index.ts:649](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L649)
## Properties
@@ -110,7 +105,7 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
#### Defined in
[index.ts:889](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L889)
[index.ts:639](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L639)
___
@@ -120,7 +115,7 @@ ___
#### Defined in
[index.ts:888](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L888)
[index.ts:638](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L638)
___
@@ -130,7 +125,7 @@ ___
#### Defined in
[index.ts:887](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L887)
[index.ts:637](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L637)
___
@@ -148,7 +143,7 @@ ___
#### Defined in
[index.ts:890](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L890)
[index.ts:640](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L640)
___
@@ -158,7 +153,7 @@ ___
#### Defined in
[index.ts:886](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L886)
[index.ts:636](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L636)
___
@@ -184,7 +179,7 @@ Creates a filter query to find all rows matching the specified criteria
#### Defined in
[index.ts:938](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L938)
[index.ts:688](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L688)
## Accessors
@@ -202,7 +197,7 @@ Creates a filter query to find all rows matching the specified criteria
#### Defined in
[index.ts:918](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L918)
[index.ts:668](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L668)
___
@@ -220,7 +215,7 @@ ___
#### Defined in
[index.ts:1171](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1171)
[index.ts:849](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L849)
## Methods
@@ -234,7 +229,7 @@ Insert records into this Table.
| Name | Type | Description |
| :------ | :------ | :------ |
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
| `data` | `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
#### Returns
@@ -248,59 +243,7 @@ The number of rows added to the table
#### Defined in
[index.ts:946](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L946)
___
### addColumns
**addColumns**(`newColumnTransforms`): `Promise`\<`void`\>
Add new columns with defined values.
#### Parameters
| Name | Type | Description |
| :------ | :------ | :------ |
| `newColumnTransforms` | \{ `name`: `string` ; `valueSql`: `string` }[] | pairs of column names and the SQL expression to use to calculate the value of the new column. These expressions will be evaluated for each row in the table, and can reference existing columns in the table. |
#### Returns
`Promise`\<`void`\>
#### Implementation of
[Table](../interfaces/Table.md).[addColumns](../interfaces/Table.md#addcolumns)
#### Defined in
[index.ts:1195](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1195)
___
### alterColumns
**alterColumns**(`columnAlterations`): `Promise`\<`void`\>
Alter the name or nullability of columns.
#### Parameters
| Name | Type | Description |
| :------ | :------ | :------ |
| `columnAlterations` | [`ColumnAlteration`](../interfaces/ColumnAlteration.md)[] | One or more alterations to apply to columns. |
#### Returns
`Promise`\<`void`\>
#### Implementation of
[Table](../interfaces/Table.md).[alterColumns](../interfaces/Table.md#altercolumns)
#### Defined in
[index.ts:1201](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1201)
[index.ts:696](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L696)
___
@@ -314,7 +257,7 @@ ___
#### Defined in
[index.ts:1183](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1183)
[index.ts:861](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L861)
___
@@ -337,7 +280,7 @@ Clean up old versions of the table, freeing disk space.
#### Defined in
[index.ts:1130](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1130)
[index.ts:808](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L808)
___
@@ -364,22 +307,16 @@ Metrics about the compaction operation.
#### Defined in
[index.ts:1153](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1153)
[index.ts:831](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L831)
___
### countRows
**countRows**(`filter?`): `Promise`\<`number`\>
**countRows**(): `Promise`\<`number`\>
Returns the number of rows in this table.
#### Parameters
| Name | Type |
| :------ | :------ |
| `filter?` | `string` |
#### Returns
`Promise`\<`number`\>
@@ -390,7 +327,7 @@ Returns the number of rows in this table.
#### Defined in
[index.ts:1021](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1021)
[index.ts:749](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L749)
___
@@ -420,13 +357,13 @@ VectorIndexParams.
#### Defined in
[index.ts:1003](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1003)
[index.ts:734](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L734)
___
### createScalarIndex
**createScalarIndex**(`column`, `replace?`): `Promise`\<`void`\>
**createScalarIndex**(`column`, `replace`): `Promise`\<`void`\>
Create a scalar index on this Table for the given column
@@ -435,7 +372,7 @@ Create a scalar index on this Table for the given column
| Name | Type | Description |
| :------ | :------ | :------ |
| `column` | `string` | The column to index |
| `replace?` | `boolean` | If false, fail if an index already exists on the column it is always set to true for remote connections Scalar indices, like vector indices, can be used to speed up scans. A scalar index can speed up scans that contain filter expressions on the indexed column. For example, the following scan will be faster if the column `my_col` has a scalar index: ```ts const con = await lancedb.connect('./.lancedb'); const table = await con.openTable('images'); const results = await table.where('my_col = 7').execute(); ``` Scalar indices can also speed up scans containing a vector search and a prefilter: ```ts const con = await lancedb.connect('././lancedb'); const table = await con.openTable('images'); const results = await table.search([1.0, 2.0]).where('my_col != 7').prefilter(true); ``` Scalar indices can only speed up scans for basic filters using equality, comparison, range (e.g. `my_col BETWEEN 0 AND 100`), and set membership (e.g. `my_col IN (0, 1, 2)`) Scalar indices can be used if the filter contains multiple indexed columns and the filter criteria are AND'd or OR'd together (e.g. `my_col < 0 AND other_col> 100`) Scalar indices may be used if the filter contains non-indexed columns but, depending on the structure of the filter, they may not be usable. For example, if the column `not_indexed` does not have a scalar index then the filter `my_col = 0 OR not_indexed = 1` will not be able to use any scalar index on `my_col`. |
| `replace` | `boolean` | If false, fail if an index already exists on the column Scalar indices, like vector indices, can be used to speed up scans. A scalar index can speed up scans that contain filter expressions on the indexed column. For example, the following scan will be faster if the column `my_col` has a scalar index: ```ts const con = await lancedb.connect('./.lancedb'); const table = await con.openTable('images'); const results = await table.where('my_col = 7').execute(); ``` Scalar indices can also speed up scans containing a vector search and a prefilter: ```ts const con = await lancedb.connect('././lancedb'); const table = await con.openTable('images'); const results = await table.search([1.0, 2.0]).where('my_col != 7').prefilter(true); ``` Scalar indices can only speed up scans for basic filters using equality, comparison, range (e.g. `my_col BETWEEN 0 AND 100`), and set membership (e.g. `my_col IN (0, 1, 2)`) Scalar indices can be used if the filter contains multiple indexed columns and the filter criteria are AND'd or OR'd together (e.g. `my_col < 0 AND other_col> 100`) Scalar indices may be used if the filter contains non-indexed columns but, depending on the structure of the filter, they may not be usable. For example, if the column `not_indexed` does not have a scalar index then the filter `my_col = 0 OR not_indexed = 1` will not be able to use any scalar index on `my_col`. |
#### Returns
@@ -455,7 +392,7 @@ await table.createScalarIndex('my_col')
#### Defined in
[index.ts:1011](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1011)
[index.ts:742](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L742)
___
@@ -481,38 +418,7 @@ Delete rows from this table.
#### Defined in
[index.ts:1030](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1030)
___
### dropColumns
▸ **dropColumns**(`columnNames`): `Promise`\<`void`\>
Drop one or more columns from the dataset
This is a metadata-only operation and does not remove the data from the
underlying storage. In order to remove the data, you must subsequently
call ``compact_files`` to rewrite the data without the removed columns and
then call ``cleanup_files`` to remove the old files.
#### Parameters
| Name | Type | Description |
| :------ | :------ | :------ |
| `columnNames` | `string`[] | The names of the columns to drop. These can be nested column references (e.g. "a.b.c") or top-level column names (e.g. "a"). |
#### Returns
`Promise`\<`void`\>
#### Implementation of
[Table](../interfaces/Table.md).[dropColumns](../interfaces/Table.md#dropcolumns)
#### Defined in
[index.ts:1205](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1205)
[index.ts:758](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L758)
___
@@ -532,13 +438,9 @@ Creates a filter query to find all rows matching the specified criteria
[`Query`](Query.md)\<`T`\>
#### Implementation of
[Table](../interfaces/Table.md).[filter](../interfaces/Table.md#filter)
#### Defined in
[index.ts:934](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L934)
[index.ts:684](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L684)
___
@@ -552,13 +454,13 @@ ___
#### Defined in
[index.ts:1176](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1176)
[index.ts:854](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L854)
___
### indexStats
▸ **indexStats**(`indexName`): `Promise`\<[`IndexStats`](../interfaces/IndexStats.md)\>
▸ **indexStats**(`indexUuid`): `Promise`\<[`IndexStats`](../interfaces/IndexStats.md)\>
Get statistics about an index.
@@ -566,7 +468,7 @@ Get statistics about an index.
| Name | Type |
| :------ | :------ |
| `indexName` | `string` |
| `indexUuid` | `string` |
#### Returns
@@ -578,7 +480,7 @@ Get statistics about an index.
#### Defined in
[index.ts:1167](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1167)
[index.ts:845](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L845)
___
@@ -598,57 +500,7 @@ List the indicies on this table.
#### Defined in
[index.ts:1163](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1163)
___
### mergeInsert
▸ **mergeInsert**(`on`, `data`, `args`): `Promise`\<`void`\>
Runs a "merge insert" operation on the table
This operation can add rows, update rows, and remove rows all in a single
transaction. It is a very generic tool that can be used to create
behaviors like "insert if not exists", "update or insert (i.e. upsert)",
or even replace a portion of existing data with new data (e.g. replace
all data where month="january")
The merge insert operation works by combining new data from a
**source table** with existing data in a **target table** by using a
join. There are three categories of records.
"Matched" records are records that exist in both the source table and
the target table. "Not matched" records exist only in the source table
(e.g. these are new data) "Not matched by source" records exist only
in the target table (this is old data)
The MergeInsertArgs can be used to customize what should happen for
each category of data.
Please note that the data may appear to be reordered as part of this
operation. This is because updated rows will be deleted from the
dataset and then reinserted at the end with the new values.
#### Parameters
| Name | Type | Description |
| :------ | :------ | :------ |
| `on` | `string` | a column to join on. This is how records from the source table and target table are matched. |
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | the new data to insert |
| `args` | [`MergeInsertArgs`](../interfaces/MergeInsertArgs.md) | parameters controlling how the operation should behave |
#### Returns
`Promise`\<`void`\>
#### Implementation of
[Table](../interfaces/Table.md).[mergeInsert](../interfaces/Table.md#mergeinsert)
#### Defined in
[index.ts:1065](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1065)
[index.ts:841](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L841)
___
@@ -662,7 +514,7 @@ Insert records into this Table, replacing its contents.
| Name | Type | Description |
| :------ | :------ | :------ |
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
| `data` | `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
#### Returns
@@ -676,7 +528,7 @@ The number of rows added to the table
#### Defined in
[index.ts:977](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L977)
[index.ts:716](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L716)
___
@@ -702,7 +554,7 @@ Creates a search query to find the nearest neighbors of the given search term
#### Defined in
[index.ts:926](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L926)
[index.ts:676](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L676)
___
@@ -728,36 +580,4 @@ Update rows in this table.
#### Defined in
[index.ts:1043](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1043)
___
### withMiddleware
▸ **withMiddleware**(`middleware`): [`Table`](../interfaces/Table.md)\<`T`\>
Instrument the behavior of this Table with middleware.
The middleware will be called in the order they are added.
Currently this functionality is only supported for remote tables.
#### Parameters
| Name | Type |
| :------ | :------ |
| `middleware` | `HttpMiddleware` |
#### Returns
[`Table`](../interfaces/Table.md)\<`T`\>
- this Table instrumented by the passed middleware
#### Implementation of
[Table](../interfaces/Table.md).[withMiddleware](../interfaces/Table.md#withmiddleware)
#### Defined in
[index.ts:1209](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1209)
[index.ts:771](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L771)

View File

@@ -1,82 +0,0 @@
[vectordb](../README.md) / [Exports](../modules.md) / MakeArrowTableOptions
# Class: MakeArrowTableOptions
Options to control the makeArrowTable call.
## Table of contents
### Constructors
- [constructor](MakeArrowTableOptions.md#constructor)
### Properties
- [dictionaryEncodeStrings](MakeArrowTableOptions.md#dictionaryencodestrings)
- [embeddings](MakeArrowTableOptions.md#embeddings)
- [schema](MakeArrowTableOptions.md#schema)
- [vectorColumns](MakeArrowTableOptions.md#vectorcolumns)
## Constructors
### constructor
**new MakeArrowTableOptions**(`values?`)
#### Parameters
| Name | Type |
| :------ | :------ |
| `values?` | `Partial`\<[`MakeArrowTableOptions`](MakeArrowTableOptions.md)\> |
#### Defined in
[arrow.ts:98](https://github.com/lancedb/lancedb/blob/92179835/node/src/arrow.ts#L98)
## Properties
### dictionaryEncodeStrings
**dictionaryEncodeStrings**: `boolean` = `false`
If true then string columns will be encoded with dictionary encoding
Set this to true if your string columns tend to repeat the same values
often. For more precise control use the `schema` property to specify the
data type for individual columns.
If `schema` is provided then this property is ignored.
#### Defined in
[arrow.ts:96](https://github.com/lancedb/lancedb/blob/92179835/node/src/arrow.ts#L96)
___
### embeddings
`Optional` **embeddings**: [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)\<`any`\>
#### Defined in
[arrow.ts:85](https://github.com/lancedb/lancedb/blob/92179835/node/src/arrow.ts#L85)
___
### schema
`Optional` **schema**: `Schema`\<`any`\>
#### Defined in
[arrow.ts:63](https://github.com/lancedb/lancedb/blob/92179835/node/src/arrow.ts#L63)
___
### vectorColumns
**vectorColumns**: `Record`\<`string`, `VectorColumnOptions`\>
#### Defined in
[arrow.ts:81](https://github.com/lancedb/lancedb/blob/92179835/node/src/arrow.ts#L81)

View File

@@ -40,7 +40,7 @@ An embedding function that automatically creates vector representation for a giv
#### Defined in
[embedding/openai.ts:22](https://github.com/lancedb/lancedb/blob/92179835/node/src/embedding/openai.ts#L22)
[embedding/openai.ts:21](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/embedding/openai.ts#L21)
## Properties
@@ -50,17 +50,17 @@ An embedding function that automatically creates vector representation for a giv
#### Defined in
[embedding/openai.ts:20](https://github.com/lancedb/lancedb/blob/92179835/node/src/embedding/openai.ts#L20)
[embedding/openai.ts:19](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/embedding/openai.ts#L19)
___
### \_openai
`Private` `Readonly` **\_openai**: `OpenAI`
`Private` `Readonly` **\_openai**: `any`
#### Defined in
[embedding/openai.ts:19](https://github.com/lancedb/lancedb/blob/92179835/node/src/embedding/openai.ts#L19)
[embedding/openai.ts:18](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/embedding/openai.ts#L18)
___
@@ -76,7 +76,7 @@ The name of the column that will be used as input for the Embedding Function.
#### Defined in
[embedding/openai.ts:56](https://github.com/lancedb/lancedb/blob/92179835/node/src/embedding/openai.ts#L56)
[embedding/openai.ts:50](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/embedding/openai.ts#L50)
## Methods
@@ -102,4 +102,4 @@ Creates a vector representation for the given values.
#### Defined in
[embedding/openai.ts:43](https://github.com/lancedb/lancedb/blob/92179835/node/src/embedding/openai.ts#L43)
[embedding/openai.ts:38](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/embedding/openai.ts#L38)

View File

@@ -19,7 +19,6 @@ A builder for nearest neighbor queries for LanceDB.
### Properties
- [\_embeddings](Query.md#_embeddings)
- [\_fastSearch](Query.md#_fastsearch)
- [\_filter](Query.md#_filter)
- [\_limit](Query.md#_limit)
- [\_metricType](Query.md#_metrictype)
@@ -35,7 +34,6 @@ A builder for nearest neighbor queries for LanceDB.
### Methods
- [execute](Query.md#execute)
- [fastSearch](Query.md#fastsearch)
- [filter](Query.md#filter)
- [isElectron](Query.md#iselectron)
- [limit](Query.md#limit)
@@ -67,7 +65,7 @@ A builder for nearest neighbor queries for LanceDB.
#### Defined in
[query.ts:39](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L39)
[query.ts:38](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L38)
## Properties
@@ -77,17 +75,7 @@ A builder for nearest neighbor queries for LanceDB.
#### Defined in
[query.ts:37](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L37)
___
### \_fastSearch
`Private` **\_fastSearch**: `boolean`
#### Defined in
[query.ts:36](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L36)
[query.ts:36](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L36)
___
@@ -97,7 +85,7 @@ ___
#### Defined in
[query.ts:33](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L33)
[query.ts:33](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L33)
___
@@ -107,7 +95,7 @@ ___
#### Defined in
[query.ts:29](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L29)
[query.ts:29](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L29)
___
@@ -117,7 +105,7 @@ ___
#### Defined in
[query.ts:34](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L34)
[query.ts:34](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L34)
___
@@ -127,7 +115,7 @@ ___
#### Defined in
[query.ts:31](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L31)
[query.ts:31](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L31)
___
@@ -137,7 +125,7 @@ ___
#### Defined in
[query.ts:35](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L35)
[query.ts:35](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L35)
___
@@ -147,7 +135,7 @@ ___
#### Defined in
[query.ts:26](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L26)
[query.ts:26](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L26)
___
@@ -157,7 +145,7 @@ ___
#### Defined in
[query.ts:28](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L28)
[query.ts:28](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L28)
___
@@ -167,7 +155,7 @@ ___
#### Defined in
[query.ts:30](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L30)
[query.ts:30](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L30)
___
@@ -177,7 +165,7 @@ ___
#### Defined in
[query.ts:32](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L32)
[query.ts:32](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L32)
___
@@ -187,7 +175,7 @@ ___
#### Defined in
[query.ts:27](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L27)
[query.ts:27](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L27)
___
@@ -213,7 +201,7 @@ A filter statement to be applied to this query.
#### Defined in
[query.ts:90](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L90)
[query.ts:87](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L87)
## Methods
@@ -235,30 +223,7 @@ Execute the query and return the results as an Array of Objects
#### Defined in
[query.ts:127](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L127)
___
### fastSearch
**fastSearch**(`value`): [`Query`](Query.md)\<`T`\>
Skip searching un-indexed data. This can make search faster, but will miss
any data that is not yet indexed.
#### Parameters
| Name | Type |
| :------ | :------ |
| `value` | `boolean` |
#### Returns
[`Query`](Query.md)\<`T`\>
#### Defined in
[query.ts:119](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L119)
[query.ts:115](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L115)
___
@@ -280,7 +245,7 @@ A filter statement to be applied to this query.
#### Defined in
[query.ts:85](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L85)
[query.ts:82](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L82)
___
@@ -294,7 +259,7 @@ ___
#### Defined in
[query.ts:155](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L155)
[query.ts:142](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L142)
___
@@ -303,7 +268,6 @@ ___
**limit**(`value`): [`Query`](Query.md)\<`T`\>
Sets the number of results that will be returned
default value is 10
#### Parameters
@@ -317,7 +281,7 @@ default value is 10
#### Defined in
[query.ts:58](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L58)
[query.ts:55](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L55)
___
@@ -343,7 +307,7 @@ MetricType for the different options
#### Defined in
[query.ts:105](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L105)
[query.ts:102](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L102)
___
@@ -365,7 +329,7 @@ The number of probes used. A higher number makes search more accurate but also s
#### Defined in
[query.ts:76](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L76)
[query.ts:73](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L73)
___
@@ -385,7 +349,7 @@ ___
#### Defined in
[query.ts:110](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L110)
[query.ts:107](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L107)
___
@@ -407,7 +371,7 @@ Refine the results by reading extra elements and re-ranking them in memory.
#### Defined in
[query.ts:67](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L67)
[query.ts:64](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L64)
___
@@ -429,4 +393,4 @@ Return only the specified columns.
#### Defined in
[query.ts:96](https://github.com/lancedb/lancedb/blob/92179835/node/src/query.ts#L96)
[query.ts:93](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/query.ts#L93)

View File

@@ -1,52 +0,0 @@
[vectordb](../README.md) / [Exports](../modules.md) / IndexStatus
# Enumeration: IndexStatus
## Table of contents
### Enumeration Members
- [Done](IndexStatus.md#done)
- [Failed](IndexStatus.md#failed)
- [Indexing](IndexStatus.md#indexing)
- [Pending](IndexStatus.md#pending)
## Enumeration Members
### Done
**Done** = ``"done"``
#### Defined in
[index.ts:713](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L713)
___
### Failed
• **Failed** = ``"failed"``
#### Defined in
[index.ts:714](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L714)
___
### Indexing
• **Indexing** = ``"indexing"``
#### Defined in
[index.ts:712](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L712)
___
### Pending
• **Pending** = ``"pending"``
#### Defined in
[index.ts:711](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L711)

View File

@@ -22,7 +22,7 @@ Cosine distance
#### Defined in
[index.ts:1381](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1381)
[index.ts:1041](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1041)
___
@@ -34,7 +34,7 @@ Dot product
#### Defined in
[index.ts:1386](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1386)
[index.ts:1046](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1046)
___
@@ -46,4 +46,4 @@ Euclidean distance
#### Defined in
[index.ts:1376](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1376)
[index.ts:1036](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1036)

View File

@@ -22,7 +22,7 @@ Append new data to the table.
#### Defined in
[index.ts:1347](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1347)
[index.ts:1007](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1007)
___
@@ -34,7 +34,7 @@ Create a new [Table](../interfaces/Table.md).
#### Defined in
[index.ts:1343](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1343)
[index.ts:1003](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1003)
___
@@ -46,4 +46,4 @@ Overwrite the existing [Table](../interfaces/Table.md) if presented.
#### Defined in
[index.ts:1345](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1345)
[index.ts:1005](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1005)

View File

@@ -18,7 +18,7 @@
#### Defined in
[index.ts:68](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L68)
[index.ts:54](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L54)
___
@@ -28,7 +28,7 @@ ___
#### Defined in
[index.ts:70](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L70)
[index.ts:56](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L56)
___
@@ -38,4 +38,4 @@ ___
#### Defined in
[index.ts:72](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L72)
[index.ts:58](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L58)

View File

@@ -19,7 +19,7 @@ The number of bytes removed from disk.
#### Defined in
[index.ts:1218](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1218)
[index.ts:878](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L878)
___
@@ -31,4 +31,4 @@ The number of old table versions removed.
#### Defined in
[index.ts:1222](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1222)
[index.ts:882](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L882)

View File

@@ -1,53 +0,0 @@
[vectordb](../README.md) / [Exports](../modules.md) / ColumnAlteration
# Interface: ColumnAlteration
A definition of a column alteration. The alteration changes the column at
`path` to have the new name `name`, to be nullable if `nullable` is true,
and to have the data type `data_type`. At least one of `rename` or `nullable`
must be provided.
## Table of contents
### Properties
- [nullable](ColumnAlteration.md#nullable)
- [path](ColumnAlteration.md#path)
- [rename](ColumnAlteration.md#rename)
## Properties
### nullable
`Optional` **nullable**: `boolean`
Set the new nullability. Note that a nullable column cannot be made non-nullable.
#### Defined in
[index.ts:638](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L638)
___
### path
**path**: `string`
The path to the column to alter. This is a dot-separated path to the column.
If it is a top-level column then it is just the name of the column. If it is
a nested column then it is the path to the column, e.g. "a.b.c" for a column
`c` nested inside a column `b` nested inside a column `a`.
#### Defined in
[index.ts:633](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L633)
___
### rename
`Optional` **rename**: `string`
#### Defined in
[index.ts:634](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L634)

View File

@@ -22,7 +22,7 @@ fragments added.
#### Defined in
[index.ts:1273](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1273)
[index.ts:933](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L933)
___
@@ -35,7 +35,7 @@ file.
#### Defined in
[index.ts:1268](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1268)
[index.ts:928](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L928)
___
@@ -47,7 +47,7 @@ The number of new fragments that were created.
#### Defined in
[index.ts:1263](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1263)
[index.ts:923](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L923)
___
@@ -59,4 +59,4 @@ The number of fragments that were removed.
#### Defined in
[index.ts:1259](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1259)
[index.ts:919](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L919)

View File

@@ -24,7 +24,7 @@ Default is true.
#### Defined in
[index.ts:1241](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1241)
[index.ts:901](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L901)
___
@@ -38,7 +38,7 @@ the deleted rows. Default is 10%.
#### Defined in
[index.ts:1247](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1247)
[index.ts:907](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L907)
___
@@ -46,11 +46,11 @@ ___
`Optional` **maxRowsPerGroup**: `number`
The maximum number of T per group. Defaults to 1024.
The maximum number of rows per group. Defaults to 1024.
#### Defined in
[index.ts:1235](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1235)
[index.ts:895](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L895)
___
@@ -63,7 +63,7 @@ the number of cores on the machine.
#### Defined in
[index.ts:1252](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1252)
[index.ts:912](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L912)
___
@@ -77,4 +77,4 @@ Defaults to 1024 * 1024.
#### Defined in
[index.ts:1231](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1231)
[index.ts:891](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L891)

View File

@@ -22,7 +22,6 @@ Connection could be local against filesystem or remote against a server.
- [dropTable](Connection.md#droptable)
- [openTable](Connection.md#opentable)
- [tableNames](Connection.md#tablenames)
- [withMiddleware](Connection.md#withmiddleware)
## Properties
@@ -32,7 +31,7 @@ Connection could be local against filesystem or remote against a server.
#### Defined in
[index.ts:261](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L261)
[index.ts:183](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L183)
## Methods
@@ -60,7 +59,7 @@ Creates a new Table, optionally initializing it with new data.
#### Defined in
[index.ts:285](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L285)
[index.ts:207](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L207)
**createTable**(`name`, `data`): `Promise`\<[`Table`](Table.md)\<`number`[]\>\>
@@ -71,7 +70,7 @@ Creates a new Table and initialize it with new data.
| Name | Type | Description |
| :------ | :------ | :------ |
| `name` | `string` | The name of the table. |
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
| `data` | `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
#### Returns
@@ -79,7 +78,7 @@ Creates a new Table and initialize it with new data.
#### Defined in
[index.ts:299](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L299)
[index.ts:221](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L221)
**createTable**(`name`, `data`, `options`): `Promise`\<[`Table`](Table.md)\<`number`[]\>\>
@@ -90,7 +89,7 @@ Creates a new Table and initialize it with new data.
| Name | Type | Description |
| :------ | :------ | :------ |
| `name` | `string` | The name of the table. |
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
| `data` | `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
| `options` | [`WriteOptions`](WriteOptions.md) | The write options to use when creating the table. |
#### Returns
@@ -99,7 +98,7 @@ Creates a new Table and initialize it with new data.
#### Defined in
[index.ts:311](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L311)
[index.ts:233](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L233)
**createTable**\<`T`\>(`name`, `data`, `embeddings`): `Promise`\<[`Table`](Table.md)\<`T`\>\>
@@ -116,7 +115,7 @@ Creates a new Table and initialize it with new data.
| Name | Type | Description |
| :------ | :------ | :------ |
| `name` | `string` | The name of the table. |
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
| `data` | `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
| `embeddings` | [`EmbeddingFunction`](EmbeddingFunction.md)\<`T`\> | An embedding function to use on this table |
#### Returns
@@ -125,7 +124,7 @@ Creates a new Table and initialize it with new data.
#### Defined in
[index.ts:324](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L324)
[index.ts:246](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L246)
**createTable**\<`T`\>(`name`, `data`, `embeddings`, `options`): `Promise`\<[`Table`](Table.md)\<`T`\>\>
@@ -142,7 +141,7 @@ Creates a new Table and initialize it with new data.
| Name | Type | Description |
| :------ | :------ | :------ |
| `name` | `string` | The name of the table. |
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
| `data` | `Record`\<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the table |
| `embeddings` | [`EmbeddingFunction`](EmbeddingFunction.md)\<`T`\> | An embedding function to use on this table |
| `options` | [`WriteOptions`](WriteOptions.md) | The write options to use when creating the table. |
@@ -152,7 +151,7 @@ Creates a new Table and initialize it with new data.
#### Defined in
[index.ts:337](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L337)
[index.ts:259](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L259)
___
@@ -174,7 +173,7 @@ Drop an existing table.
#### Defined in
[index.ts:348](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L348)
[index.ts:270](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L270)
___
@@ -203,7 +202,7 @@ Open a table in the database.
#### Defined in
[index.ts:271](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L271)
[index.ts:193](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L193)
___
@@ -217,32 +216,4 @@ ___
#### Defined in
[index.ts:263](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L263)
___
### withMiddleware
**withMiddleware**(`middleware`): [`Connection`](Connection.md)
Instrument the behavior of this Connection with middleware.
The middleware will be called in the order they are added.
Currently this functionality is only supported for remote Connections.
#### Parameters
| Name | Type |
| :------ | :------ |
| `middleware` | `HttpMiddleware` |
#### Returns
[`Connection`](Connection.md)
- this Connection instrumented by the passed middleware
#### Defined in
[index.ts:360](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L360)
[index.ts:185](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L185)

View File

@@ -10,10 +10,7 @@
- [awsCredentials](ConnectionOptions.md#awscredentials)
- [awsRegion](ConnectionOptions.md#awsregion)
- [hostOverride](ConnectionOptions.md#hostoverride)
- [readConsistencyInterval](ConnectionOptions.md#readconsistencyinterval)
- [region](ConnectionOptions.md#region)
- [storageOptions](ConnectionOptions.md#storageoptions)
- [timeout](ConnectionOptions.md#timeout)
- [uri](ConnectionOptions.md#uri)
## Properties
@@ -22,13 +19,9 @@
`Optional` **apiKey**: `string`
API key for the remote connections
Can also be passed by setting environment variable `LANCEDB_API_KEY`
#### Defined in
[index.ts:112](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L112)
[index.ts:81](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L81)
___
@@ -40,14 +33,9 @@ User provided AWS crednetials.
If not provided, LanceDB will use the default credentials provider chain.
**`Deprecated`**
Pass `aws_access_key_id`, `aws_secret_access_key`, and `aws_session_token`
through `storageOptions` instead.
#### Defined in
[index.ts:92](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L92)
[index.ts:75](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L75)
___
@@ -55,15 +43,11 @@ ___
`Optional` **awsRegion**: `string`
AWS region to connect to. Default is defaultAwsRegion
**`Deprecated`**
Pass `region` through `storageOptions` instead.
AWS region to connect to. Default is defaultAwsRegion.
#### Defined in
[index.ts:98](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L98)
[index.ts:78](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L78)
___
@@ -71,33 +55,13 @@ ___
`Optional` **hostOverride**: `string`
Override the host URL for the remote connection.
Override the host URL for the remote connections.
This is useful for local testing.
#### Defined in
[index.ts:122](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L122)
___
### readConsistencyInterval
`Optional` **readConsistencyInterval**: `number`
(For LanceDB OSS only): The interval, in seconds, at which to check for
updates to the table from other processes. If None, then consistency is not
checked. For performance reasons, this is the default. For strong
consistency, set this to zero seconds. Then every read will check for
updates from other processes. As a compromise, you can set this to a
non-zero value for eventual consistency. If more than that interval
has passed since the last check, then the table will be checked for updates.
Note: this consistency only applies to read operations. Write operations are
always consistent.
#### Defined in
[index.ts:140](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L140)
[index.ts:91](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L91)
___
@@ -105,37 +69,11 @@ ___
`Optional` **region**: `string`
Region to connect. Default is 'us-east-1'
Region to connect
#### Defined in
[index.ts:115](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L115)
___
### storageOptions
`Optional` **storageOptions**: `Record`\<`string`, `string`\>
User provided options for object storage. For example, S3 credentials or request timeouts.
The various options are described at https://lancedb.github.io/lancedb/guides/storage/
#### Defined in
[index.ts:105](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L105)
___
### timeout
`Optional` **timeout**: `number`
Duration in milliseconds for request timeout. Default = 10,000 (10 seconds)
#### Defined in
[index.ts:127](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L127)
[index.ts:84](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L84)
___
@@ -147,8 +85,8 @@ LanceDB database URI.
- `/path/to/database` - local database
- `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
- `db://host:port` - remote database (LanceDB cloud)
- `db://host:port` - remote database (SaaS)
#### Defined in
[index.ts:83](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L83)
[index.ts:69](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L69)

View File

@@ -26,7 +26,7 @@
#### Defined in
[index.ts:163](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L163)
[index.ts:116](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L116)
___
@@ -36,7 +36,7 @@ ___
#### Defined in
[index.ts:169](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L169)
[index.ts:122](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L122)
___
@@ -46,7 +46,7 @@ ___
#### Defined in
[index.ts:160](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L160)
[index.ts:113](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L113)
___
@@ -56,7 +56,7 @@ ___
#### Defined in
[index.ts:166](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L166)
[index.ts:119](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L119)
___
@@ -66,4 +66,4 @@ ___
#### Defined in
[index.ts:172](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L172)
[index.ts:125](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L125)

View File

@@ -18,29 +18,11 @@ An embedding function that automatically creates vector representation for a giv
### Properties
- [destColumn](EmbeddingFunction.md#destcolumn)
- [embed](EmbeddingFunction.md#embed)
- [embeddingDataType](EmbeddingFunction.md#embeddingdatatype)
- [embeddingDimension](EmbeddingFunction.md#embeddingdimension)
- [excludeSource](EmbeddingFunction.md#excludesource)
- [sourceColumn](EmbeddingFunction.md#sourcecolumn)
## Properties
### destColumn
`Optional` **destColumn**: `string`
The name of the column that will contain the embedding
By default this is "vector"
#### Defined in
[embedding/embedding_function.ts:49](https://github.com/lancedb/lancedb/blob/92179835/node/src/embedding/embedding_function.ts#L49)
___
### embed
**embed**: (`data`: `T`[]) => `Promise`\<`number`[][]\>
@@ -63,54 +45,7 @@ Creates a vector representation for the given values.
#### Defined in
[embedding/embedding_function.ts:62](https://github.com/lancedb/lancedb/blob/92179835/node/src/embedding/embedding_function.ts#L62)
___
### embeddingDataType
`Optional` **embeddingDataType**: `Float`\<`Floats`\>
The data type of the embedding
The embedding function should return `number`. This will be converted into
an Arrow float array. By default this will be Float32 but this property can
be used to control the conversion.
#### Defined in
[embedding/embedding_function.ts:33](https://github.com/lancedb/lancedb/blob/92179835/node/src/embedding/embedding_function.ts#L33)
___
### embeddingDimension
`Optional` **embeddingDimension**: `number`
The dimension of the embedding
This is optional, normally this can be determined by looking at the results of
`embed`. If this is not specified, and there is an attempt to apply the embedding
to an empty table, then that process will fail.
#### Defined in
[embedding/embedding_function.ts:42](https://github.com/lancedb/lancedb/blob/92179835/node/src/embedding/embedding_function.ts#L42)
___
### excludeSource
`Optional` **excludeSource**: `boolean`
Should the source column be excluded from the resulting table
By default the source column is included. Set this to true and
only the embedding will be stored.
#### Defined in
[embedding/embedding_function.ts:57](https://github.com/lancedb/lancedb/blob/92179835/node/src/embedding/embedding_function.ts#L57)
[embedding/embedding_function.ts:27](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/embedding/embedding_function.ts#L27)
___
@@ -122,4 +57,4 @@ The name of the column that will be used as input for the Embedding Function.
#### Defined in
[embedding/embedding_function.ts:24](https://github.com/lancedb/lancedb/blob/92179835/node/src/embedding/embedding_function.ts#L24)
[embedding/embedding_function.ts:22](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/embedding/embedding_function.ts#L22)

View File

@@ -6,51 +6,18 @@
### Properties
- [distanceType](IndexStats.md#distancetype)
- [indexType](IndexStats.md#indextype)
- [numIndexedRows](IndexStats.md#numindexedrows)
- [numIndices](IndexStats.md#numindices)
- [numUnindexedRows](IndexStats.md#numunindexedrows)
## Properties
### distanceType
`Optional` **distanceType**: `string`
#### Defined in
[index.ts:728](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L728)
___
### indexType
**indexType**: `string`
#### Defined in
[index.ts:727](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L727)
___
### numIndexedRows
**numIndexedRows**: ``null`` \| `number`
#### Defined in
[index.ts:725](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L725)
___
### numIndices
• `Optional` **numIndices**: `number`
#### Defined in
[index.ts:729](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L729)
[index.ts:478](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L478)
___
@@ -60,4 +27,4 @@ ___
#### Defined in
[index.ts:726](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L726)
[index.ts:479](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L479)

View File

@@ -29,7 +29,7 @@ The column to be indexed
#### Defined in
[index.ts:1282](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1282)
[index.ts:942](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L942)
___
@@ -41,7 +41,7 @@ Cache size of the index
#### Defined in
[index.ts:1331](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1331)
[index.ts:991](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L991)
___
@@ -53,7 +53,7 @@ A unique name for the index
#### Defined in
[index.ts:1287](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1287)
[index.ts:947](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L947)
___
@@ -65,7 +65,7 @@ The max number of iterations for kmeans training.
#### Defined in
[index.ts:1302](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1302)
[index.ts:962](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L962)
___
@@ -77,7 +77,7 @@ Max number of iterations to train OPQ, if `use_opq` is true.
#### Defined in
[index.ts:1321](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1321)
[index.ts:981](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L981)
___
@@ -89,7 +89,7 @@ Metric type, L2 or Cosine
#### Defined in
[index.ts:1292](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1292)
[index.ts:952](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L952)
___
@@ -101,7 +101,7 @@ The number of bits to present one PQ centroid.
#### Defined in
[index.ts:1316](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1316)
[index.ts:976](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L976)
___
@@ -113,7 +113,7 @@ The number of partitions this index
#### Defined in
[index.ts:1297](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1297)
[index.ts:957](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L957)
___
@@ -125,7 +125,7 @@ Number of subvectors to build PQ code
#### Defined in
[index.ts:1312](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1312)
[index.ts:972](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L972)
___
@@ -137,7 +137,7 @@ Replace an existing index with the same name if it exists.
#### Defined in
[index.ts:1326](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1326)
[index.ts:986](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L986)
___
@@ -147,7 +147,7 @@ ___
#### Defined in
[index.ts:1333](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1333)
[index.ts:993](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L993)
___
@@ -159,4 +159,4 @@ Train as optimized product quantization.
#### Defined in
[index.ts:1307](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1307)
[index.ts:967](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L967)

View File

@@ -1,73 +0,0 @@
[vectordb](../README.md) / [Exports](../modules.md) / MergeInsertArgs
# Interface: MergeInsertArgs
## Table of contents
### Properties
- [whenMatchedUpdateAll](MergeInsertArgs.md#whenmatchedupdateall)
- [whenNotMatchedBySourceDelete](MergeInsertArgs.md#whennotmatchedbysourcedelete)
- [whenNotMatchedInsertAll](MergeInsertArgs.md#whennotmatchedinsertall)
## Properties
### whenMatchedUpdateAll
`Optional` **whenMatchedUpdateAll**: `string` \| `boolean`
If true then rows that exist in both the source table (new data) and
the target table (old data) will be updated, replacing the old row
with the corresponding matching row.
If there are multiple matches then the behavior is undefined.
Currently this causes multiple copies of the row to be created
but that behavior is subject to change.
Optionally, a filter can be specified. This should be an SQL
filter where fields with the prefix "target." refer to fields
in the target table (old data) and fields with the prefix
"source." refer to fields in the source table (new data). For
example, the filter "target.lastUpdated < source.lastUpdated" will
only update matched rows when the incoming `lastUpdated` value is
newer.
Rows that do not match the filter will not be updated. Rows that
do not match the filter do become "not matched" rows.
#### Defined in
[index.ts:690](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L690)
___
### whenNotMatchedBySourceDelete
`Optional` **whenNotMatchedBySourceDelete**: `string` \| `boolean`
If true then rows that exist only in the target table (old data)
will be deleted.
If this is a string then it will be treated as an SQL filter and
only rows that both do not match any row in the source table and
match the given filter will be deleted.
This can be used to replace a selection of existing data with
new data.
#### Defined in
[index.ts:707](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L707)
___
### whenNotMatchedInsertAll
`Optional` **whenNotMatchedInsertAll**: `boolean`
If true then rows that exist only in the source table (new data)
will be inserted into the target table.
#### Defined in
[index.ts:695](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L695)

View File

@@ -25,26 +25,17 @@ A LanceDB Table is the collection of Records. Each Record has one or more vector
- [delete](Table.md#delete)
- [indexStats](Table.md#indexstats)
- [listIndices](Table.md#listindices)
- [mergeInsert](Table.md#mergeinsert)
- [name](Table.md#name)
- [overwrite](Table.md#overwrite)
- [schema](Table.md#schema)
- [search](Table.md#search)
- [update](Table.md#update)
### Methods
- [addColumns](Table.md#addcolumns)
- [alterColumns](Table.md#altercolumns)
- [dropColumns](Table.md#dropcolumns)
- [filter](Table.md#filter)
- [withMiddleware](Table.md#withmiddleware)
## Properties
### add
**add**: (`data`: `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[]) => `Promise`\<`number`\>
**add**: (`data`: `Record`\<`string`, `unknown`\>[]) => `Promise`\<`number`\>
#### Type declaration
@@ -56,7 +47,7 @@ Insert records into this Table.
| Name | Type | Description |
| :------ | :------ | :------ |
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
| `data` | `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
##### Returns
@@ -66,33 +57,27 @@ The number of rows added to the table
#### Defined in
[index.ts:381](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L381)
[index.ts:291](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L291)
___
### countRows
**countRows**: (`filter?`: `string`) => `Promise`\<`number`\>
**countRows**: () => `Promise`\<`number`\>
#### Type declaration
▸ (`filter?`): `Promise`\<`number`\>
▸ (): `Promise`\<`number`\>
Returns the number of rows in this table.
##### Parameters
| Name | Type |
| :------ | :------ |
| `filter?` | `string` |
##### Returns
`Promise`\<`number`\>
#### Defined in
[index.ts:454](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L454)
[index.ts:361](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L361)
___
@@ -122,17 +107,17 @@ VectorIndexParams.
#### Defined in
[index.ts:398](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L398)
[index.ts:306](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L306)
___
### createScalarIndex
**createScalarIndex**: (`column`: `string`, `replace?`: `boolean`) => `Promise`\<`void`\>
**createScalarIndex**: (`column`: `string`, `replace`: `boolean`) => `Promise`\<`void`\>
#### Type declaration
▸ (`column`, `replace?`): `Promise`\<`void`\>
▸ (`column`, `replace`): `Promise`\<`void`\>
Create a scalar index on this Table for the given column
@@ -141,7 +126,7 @@ Create a scalar index on this Table for the given column
| Name | Type | Description |
| :------ | :------ | :------ |
| `column` | `string` | The column to index |
| `replace?` | `boolean` | If false, fail if an index already exists on the column it is always set to true for remote connections Scalar indices, like vector indices, can be used to speed up scans. A scalar index can speed up scans that contain filter expressions on the indexed column. For example, the following scan will be faster if the column `my_col` has a scalar index: ```ts const con = await lancedb.connect('./.lancedb'); const table = await con.openTable('images'); const results = await table.where('my_col = 7').execute(); ``` Scalar indices can also speed up scans containing a vector search and a prefilter: ```ts const con = await lancedb.connect('././lancedb'); const table = await con.openTable('images'); const results = await table.search([1.0, 2.0]).where('my_col != 7').prefilter(true); ``` Scalar indices can only speed up scans for basic filters using equality, comparison, range (e.g. `my_col BETWEEN 0 AND 100`), and set membership (e.g. `my_col IN (0, 1, 2)`) Scalar indices can be used if the filter contains multiple indexed columns and the filter criteria are AND'd or OR'd together (e.g. `my_col < 0 AND other_col> 100`) Scalar indices may be used if the filter contains non-indexed columns but, depending on the structure of the filter, they may not be usable. For example, if the column `not_indexed` does not have a scalar index then the filter `my_col = 0 OR not_indexed = 1` will not be able to use any scalar index on `my_col`. |
| `replace` | `boolean` | If false, fail if an index already exists on the column Scalar indices, like vector indices, can be used to speed up scans. A scalar index can speed up scans that contain filter expressions on the indexed column. For example, the following scan will be faster if the column `my_col` has a scalar index: ```ts const con = await lancedb.connect('./.lancedb'); const table = await con.openTable('images'); const results = await table.where('my_col = 7').execute(); ``` Scalar indices can also speed up scans containing a vector search and a prefilter: ```ts const con = await lancedb.connect('././lancedb'); const table = await con.openTable('images'); const results = await table.search([1.0, 2.0]).where('my_col != 7').prefilter(true); ``` Scalar indices can only speed up scans for basic filters using equality, comparison, range (e.g. `my_col BETWEEN 0 AND 100`), and set membership (e.g. `my_col IN (0, 1, 2)`) Scalar indices can be used if the filter contains multiple indexed columns and the filter criteria are AND'd or OR'd together (e.g. `my_col < 0 AND other_col> 100`) Scalar indices may be used if the filter contains non-indexed columns but, depending on the structure of the filter, they may not be usable. For example, if the column `not_indexed` does not have a scalar index then the filter `my_col = 0 OR not_indexed = 1` will not be able to use any scalar index on `my_col`. |
##### Returns
@@ -157,7 +142,7 @@ await table.createScalarIndex('my_col')
#### Defined in
[index.ts:449](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L449)
[index.ts:356](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L356)
___
@@ -209,17 +194,17 @@ await tbl.countRows() // Returns 1
#### Defined in
[index.ts:488](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L488)
[index.ts:395](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L395)
___
### indexStats
• **indexStats**: (`indexName`: `string`) => `Promise`\<[`IndexStats`](IndexStats.md)\>
• **indexStats**: (`indexUuid`: `string`) => `Promise`\<[`IndexStats`](IndexStats.md)\>
#### Type declaration
▸ (`indexName`): `Promise`\<[`IndexStats`](IndexStats.md)\>
▸ (`indexUuid`): `Promise`\<[`IndexStats`](IndexStats.md)\>
Get statistics about an index.
@@ -227,7 +212,7 @@ Get statistics about an index.
| Name | Type |
| :------ | :------ |
| `indexName` | `string` |
| `indexUuid` | `string` |
##### Returns
@@ -235,7 +220,7 @@ Get statistics about an index.
#### Defined in
[index.ts:567](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L567)
[index.ts:438](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L438)
___
@@ -255,57 +240,7 @@ List the indicies on this table.
#### Defined in
[index.ts:562](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L562)
___
### mergeInsert
• **mergeInsert**: (`on`: `string`, `data`: `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[], `args`: [`MergeInsertArgs`](MergeInsertArgs.md)) => `Promise`\<`void`\>
#### Type declaration
▸ (`on`, `data`, `args`): `Promise`\<`void`\>
Runs a "merge insert" operation on the table
This operation can add rows, update rows, and remove rows all in a single
transaction. It is a very generic tool that can be used to create
behaviors like "insert if not exists", "update or insert (i.e. upsert)",
or even replace a portion of existing data with new data (e.g. replace
all data where month="january")
The merge insert operation works by combining new data from a
**source table** with existing data in a **target table** by using a
join. There are three categories of records.
"Matched" records are records that exist in both the source table and
the target table. "Not matched" records exist only in the source table
(e.g. these are new data) "Not matched by source" records exist only
in the target table (this is old data)
The MergeInsertArgs can be used to customize what should happen for
each category of data.
Please note that the data may appear to be reordered as part of this
operation. This is because updated rows will be deleted from the
dataset and then reinserted at the end with the new values.
##### Parameters
| Name | Type | Description |
| :------ | :------ | :------ |
| `on` | `string` | a column to join on. This is how records from the source table and target table are matched. |
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | the new data to insert |
| `args` | [`MergeInsertArgs`](MergeInsertArgs.md) | parameters controlling how the operation should behave |
##### Returns
`Promise`\<`void`\>
#### Defined in
[index.ts:553](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L553)
[index.ts:433](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L433)
___
@@ -315,13 +250,13 @@ ___
#### Defined in
[index.ts:367](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L367)
[index.ts:277](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L277)
___
### overwrite
• **overwrite**: (`data`: `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[]) => `Promise`\<`number`\>
• **overwrite**: (`data`: `Record`\<`string`, `unknown`\>[]) => `Promise`\<`number`\>
#### Type declaration
@@ -333,7 +268,7 @@ Insert records into this Table, replacing its contents.
| Name | Type | Description |
| :------ | :------ | :------ |
| `data` | `Table`\<`any`\> \| `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
| `data` | `Record`\<`string`, `unknown`\>[] | Records to be inserted into the Table |
##### Returns
@@ -343,7 +278,7 @@ The number of rows added to the table
#### Defined in
[index.ts:389](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L389)
[index.ts:299](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L299)
___
@@ -353,7 +288,7 @@ ___
#### Defined in
[index.ts:571](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L571)
[index.ts:440](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L440)
___
@@ -379,7 +314,7 @@ Creates a search query to find the nearest neighbors of the given search term
#### Defined in
[index.ts:373](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L373)
[index.ts:283](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L283)
___
@@ -430,123 +365,4 @@ let results = await tbl.search([1, 1]).execute();
#### Defined in
[index.ts:521](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L521)
## Methods
### addColumns
▸ **addColumns**(`newColumnTransforms`): `Promise`\<`void`\>
Add new columns with defined values.
#### Parameters
| Name | Type | Description |
| :------ | :------ | :------ |
| `newColumnTransforms` | \{ `name`: `string` ; `valueSql`: `string` }[] | pairs of column names and the SQL expression to use to calculate the value of the new column. These expressions will be evaluated for each row in the table, and can reference existing columns in the table. |
#### Returns
`Promise`\<`void`\>
#### Defined in
[index.ts:582](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L582)
___
### alterColumns
▸ **alterColumns**(`columnAlterations`): `Promise`\<`void`\>
Alter the name or nullability of columns.
#### Parameters
| Name | Type | Description |
| :------ | :------ | :------ |
| `columnAlterations` | [`ColumnAlteration`](ColumnAlteration.md)[] | One or more alterations to apply to columns. |
#### Returns
`Promise`\<`void`\>
#### Defined in
[index.ts:591](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L591)
___
### dropColumns
▸ **dropColumns**(`columnNames`): `Promise`\<`void`\>
Drop one or more columns from the dataset
This is a metadata-only operation and does not remove the data from the
underlying storage. In order to remove the data, you must subsequently
call ``compact_files`` to rewrite the data without the removed columns and
then call ``cleanup_files`` to remove the old files.
#### Parameters
| Name | Type | Description |
| :------ | :------ | :------ |
| `columnNames` | `string`[] | The names of the columns to drop. These can be nested column references (e.g. "a.b.c") or top-level column names (e.g. "a"). |
#### Returns
`Promise`\<`void`\>
#### Defined in
[index.ts:605](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L605)
___
### filter
▸ **filter**(`value`): [`Query`](../classes/Query.md)\<`T`\>
#### Parameters
| Name | Type |
| :------ | :------ |
| `value` | `string` |
#### Returns
[`Query`](../classes/Query.md)\<`T`\>
#### Defined in
[index.ts:569](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L569)
___
### withMiddleware
▸ **withMiddleware**(`middleware`): [`Table`](Table.md)\<`T`\>
Instrument the behavior of this Table with middleware.
The middleware will be called in the order they are added.
Currently this functionality is only supported for remote tables.
#### Parameters
| Name | Type |
| :------ | :------ |
| `middleware` | `HttpMiddleware` |
#### Returns
[`Table`](Table.md)\<`T`\>
- this Table instrumented by the passed middleware
#### Defined in
[index.ts:617](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L617)
[index.ts:428](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L428)

View File

@@ -20,7 +20,7 @@ new values to set
#### Defined in
[index.ts:652](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L652)
[index.ts:454](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L454)
___
@@ -33,4 +33,4 @@ in which case all rows will be updated.
#### Defined in
[index.ts:646](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L646)
[index.ts:448](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L448)

View File

@@ -20,7 +20,7 @@ new values to set as SQL expressions.
#### Defined in
[index.ts:666](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L666)
[index.ts:468](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L468)
___
@@ -33,4 +33,4 @@ in which case all rows will be updated.
#### Defined in
[index.ts:660](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L660)
[index.ts:462](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L462)

View File

@@ -8,7 +8,6 @@
- [columns](VectorIndex.md#columns)
- [name](VectorIndex.md#name)
- [status](VectorIndex.md#status)
- [uuid](VectorIndex.md#uuid)
## Properties
@@ -19,7 +18,7 @@
#### Defined in
[index.ts:718](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L718)
[index.ts:472](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L472)
___
@@ -29,17 +28,7 @@ ___
#### Defined in
[index.ts:719](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L719)
___
### status
**status**: [`IndexStatus`](../enums/IndexStatus.md)
#### Defined in
[index.ts:721](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L721)
[index.ts:473](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L473)
___
@@ -49,4 +38,4 @@ ___
#### Defined in
[index.ts:720](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L720)
[index.ts:474](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L474)

View File

@@ -24,4 +24,4 @@ A [WriteMode](../enums/WriteMode.md) to use on this operation
#### Defined in
[index.ts:1355](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1355)
[index.ts:1015](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1015)

View File

@@ -6,7 +6,6 @@
### Enumerations
- [IndexStatus](enums/IndexStatus.md)
- [MetricType](enums/MetricType.md)
- [WriteMode](enums/WriteMode.md)
@@ -15,7 +14,6 @@
- [DefaultWriteOptions](classes/DefaultWriteOptions.md)
- [LocalConnection](classes/LocalConnection.md)
- [LocalTable](classes/LocalTable.md)
- [MakeArrowTableOptions](classes/MakeArrowTableOptions.md)
- [OpenAIEmbeddingFunction](classes/OpenAIEmbeddingFunction.md)
- [Query](classes/Query.md)
@@ -23,7 +21,6 @@
- [AwsCredentials](interfaces/AwsCredentials.md)
- [CleanupStats](interfaces/CleanupStats.md)
- [ColumnAlteration](interfaces/ColumnAlteration.md)
- [CompactionMetrics](interfaces/CompactionMetrics.md)
- [CompactionOptions](interfaces/CompactionOptions.md)
- [Connection](interfaces/Connection.md)
@@ -32,7 +29,6 @@
- [EmbeddingFunction](interfaces/EmbeddingFunction.md)
- [IndexStats](interfaces/IndexStats.md)
- [IvfPQIndexConfig](interfaces/IvfPQIndexConfig.md)
- [MergeInsertArgs](interfaces/MergeInsertArgs.md)
- [Table](interfaces/Table.md)
- [UpdateArgs](interfaces/UpdateArgs.md)
- [UpdateSqlArgs](interfaces/UpdateSqlArgs.md)
@@ -46,9 +42,7 @@
### Functions
- [connect](modules.md#connect)
- [convertToTable](modules.md#converttotable)
- [isWriteOptions](modules.md#iswriteoptions)
- [makeArrowTable](modules.md#makearrowtable)
## Type Aliases
@@ -58,7 +52,7 @@
#### Defined in
[index.ts:1336](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1336)
[index.ts:996](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L996)
## Functions
@@ -68,11 +62,11 @@
Connect to a LanceDB instance at the given URI.
Accepted formats:
Accpeted formats:
- `/path/to/database` - local database
- `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
- `db://host:port` - remote database (LanceDB cloud)
- `db://host:port` - remote database (SaaS)
#### Parameters
@@ -90,7 +84,7 @@ Accepted formats:
#### Defined in
[index.ts:188](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L188)
[index.ts:141](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L141)
**connect**(`opts`): `Promise`\<[`Connection`](interfaces/Connection.md)\>
@@ -108,35 +102,7 @@ Connect to a LanceDB instance with connection options.
#### Defined in
[index.ts:194](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L194)
___
### convertToTable
**convertToTable**\<`T`\>(`data`, `embeddings?`, `makeTableOptions?`): `Promise`\<`ArrowTable`\>
#### Type parameters
| Name |
| :------ |
| `T` |
#### Parameters
| Name | Type |
| :------ | :------ |
| `data` | `Record`\<`string`, `unknown`\>[] |
| `embeddings?` | [`EmbeddingFunction`](interfaces/EmbeddingFunction.md)\<`T`\> |
| `makeTableOptions?` | `Partial`\<[`MakeArrowTableOptions`](classes/MakeArrowTableOptions.md)\> |
#### Returns
`Promise`\<`ArrowTable`\>
#### Defined in
[arrow.ts:465](https://github.com/lancedb/lancedb/blob/92179835/node/src/arrow.ts#L465)
[index.ts:147](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L147)
___
@@ -156,116 +122,4 @@ value is WriteOptions
#### Defined in
[index.ts:1362](https://github.com/lancedb/lancedb/blob/92179835/node/src/index.ts#L1362)
___
### makeArrowTable
**makeArrowTable**(`data`, `options?`): `ArrowTable`
An enhanced version of the makeTable function from Apache Arrow
that supports nested fields and embeddings columns.
This function converts an array of Record<String, any> (row-major JS objects)
to an Arrow Table (a columnar structure)
Note that it currently does not support nulls.
If a schema is provided then it will be used to determine the resulting array
types. Fields will also be reordered to fit the order defined by the schema.
If a schema is not provided then the types will be inferred and the field order
will be controlled by the order of properties in the first record.
If the input is empty then a schema must be provided to create an empty table.
When a schema is not specified then data types will be inferred. The inference
rules are as follows:
- boolean => Bool
- number => Float64
- String => Utf8
- Buffer => Binary
- Record<String, any> => Struct
- Array<any> => List
#### Parameters
| Name | Type | Description |
| :------ | :------ | :------ |
| `data` | `Record`\<`string`, `any`\>[] | input data |
| `options?` | `Partial`\<[`MakeArrowTableOptions`](classes/MakeArrowTableOptions.md)\> | options to control the makeArrowTable call. |
#### Returns
`ArrowTable`
**`Example`**
```ts
import { fromTableToBuffer, makeArrowTable } from "../arrow";
import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
const schema = new Schema([
new Field("a", new Int32()),
new Field("b", new Float32()),
new Field("c", new FixedSizeList(3, new Field("item", new Float16()))),
]);
const table = makeArrowTable([
{ a: 1, b: 2, c: [1, 2, 3] },
{ a: 4, b: 5, c: [4, 5, 6] },
{ a: 7, b: 8, c: [7, 8, 9] },
], { schema });
```
By default it assumes that the column named `vector` is a vector column
and it will be converted into a fixed size list array of type float32.
The `vectorColumns` option can be used to support other vector column
names and data types.
```ts
const schema = new Schema([
new Field("a", new Float64()),
new Field("b", new Float64()),
new Field(
"vector",
new FixedSizeList(3, new Field("item", new Float32()))
),
]);
const table = makeArrowTable([
{ a: 1, b: 2, vector: [1, 2, 3] },
{ a: 4, b: 5, vector: [4, 5, 6] },
{ a: 7, b: 8, vector: [7, 8, 9] },
]);
assert.deepEqual(table.schema, schema);
```
You can specify the vector column types and names using the options as well
```typescript
const schema = new Schema([
new Field('a', new Float64()),
new Field('b', new Float64()),
new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
]);
const table = makeArrowTable([
{ a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
{ a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
{ a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
], {
vectorColumns: {
vec1: { type: new Float16() },
vec2: { type: new Float16() }
}
}
assert.deepEqual(table.schema, schema)
```
#### Defined in
[arrow.ts:198](https://github.com/lancedb/lancedb/blob/92179835/node/src/arrow.ts#L198)
[index.ts:1022](https://github.com/lancedb/lancedb/blob/c89d5e6/node/src/index.ts#L1022)

View File

@@ -39,46 +39,4 @@
height: 1.2rem;
margin-top: -.1rem;
}
}
/* remove pilcrow as permanent link and add chain icon similar to github https://github.com/squidfunk/mkdocs-material/discussions/3535 */
.headerlink {
--permalink-size: 16px; /* for font-relative sizes, 0.6em is a good choice */
--permalink-spacing: 4px;
width: calc(var(--permalink-size) + var(--permalink-spacing));
height: var(--permalink-size);
vertical-align: middle;
background-color: var(--md-default-fg-color--lighter);
background-size: var(--permalink-size);
mask-size: var(--permalink-size);
-webkit-mask-size: var(--permalink-size);
mask-repeat: no-repeat;
-webkit-mask-repeat: no-repeat;
visibility: visible;
mask-image: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" version="1.1" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg>');
-webkit-mask-image: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" version="1.1" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg>');
}
[id]:target .headerlink {
background-color: var(--md-typeset-a-color);
}
.headerlink:hover {
background-color: var(--md-accent-fg-color) !important;
}
@media screen and (min-width: 76.25em) {
h1, h2, h3, h4, h5, h6 {
display: flex;
align-items: center;
flex-direction: row;
column-gap: 0.2em; /* fixes spaces in titles */
}
.headerlink {
order: -1;
margin-left: calc(var(--permalink-size) * -1 - var(--permalink-spacing)) !important;
}
}
}

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.11.1-beta.0</version>
<version>0.11.0-beta.1</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.11.1-beta.0</version>
<version>0.11.0-beta.1</version>
<packaging>pom</packaging>
<name>LanceDB Parent</name>

1468
node/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"name": "vectordb",
"version": "0.11.1-beta.0",
"version": "0.11.0-beta.1",
"description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js",
"types": "dist/index.d.ts",
@@ -58,7 +58,7 @@
"ts-node-dev": "^2.0.0",
"typedoc": "^0.24.7",
"typedoc-plugin-markdown": "^3.15.3",
"typescript": "^5.1.0",
"typescript": "*",
"uuid": "^9.0.0"
},
"dependencies": {
@@ -88,10 +88,10 @@
}
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.11.1-beta.0",
"@lancedb/vectordb-darwin-x64": "0.11.1-beta.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.11.1-beta.0",
"@lancedb/vectordb-linux-x64-gnu": "0.11.1-beta.0",
"@lancedb/vectordb-win32-x64-msvc": "0.11.1-beta.0"
"@lancedb/vectordb-darwin-arm64": "0.4.20",
"@lancedb/vectordb-darwin-x64": "0.4.20",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.20",
"@lancedb/vectordb-linux-x64-gnu": "0.4.20",
"@lancedb/vectordb-win32-x64-msvc": "0.4.20"
}
}

View File

@@ -564,7 +564,7 @@ export interface Table<T = number[]> {
/**
* Get statistics about an index.
*/
indexStats: (indexName: string) => Promise<IndexStats>
indexStats: (indexUuid: string) => Promise<IndexStats>
filter(value: string): Query<T>
@@ -1164,8 +1164,8 @@ export class LocalTable<T = number[]> implements Table<T> {
return tableListIndices.call(this._tbl);
}
async indexStats(indexName: string): Promise<IndexStats> {
return tableIndexStats.call(this._tbl, indexName);
async indexStats(indexUuid: string): Promise<IndexStats> {
return tableIndexStats.call(this._tbl, indexUuid);
}
get schema(): Promise<Schema> {

View File

@@ -14,7 +14,6 @@
import { describe } from 'mocha'
import * as chai from 'chai'
import { assert } from 'chai'
import * as chaiAsPromised from 'chai-as-promised'
import { v4 as uuidv4 } from 'uuid'
@@ -23,6 +22,7 @@ import { tmpdir } from 'os'
import * as fs from 'fs'
import * as path from 'path'
const assert = chai.assert
chai.use(chaiAsPromised)
describe('LanceDB AWS Integration test', function () {

View File

@@ -142,9 +142,9 @@ export class Query<T = number[]> {
Object.keys(entry).forEach((key: string) => {
if (entry[key] instanceof Vector) {
// toJSON() returns f16 array correctly
newObject[key] = (entry[key] as any).toJSON()
newObject[key] = (entry[key] as Vector).toJSON()
} else {
newObject[key] = entry[key] as any
newObject[key] = entry[key]
}
})
return newObject as unknown as T

View File

@@ -247,9 +247,9 @@ export class RemoteQuery<T = number[]> extends Query<T> {
const newObject: Record<string, unknown> = {}
Object.keys(entry).forEach((key: string) => {
if (entry[key] instanceof Vector) {
newObject[key] = (entry[key] as any).toArray()
newObject[key] = (entry[key] as Vector).toArray()
} else {
newObject[key] = entry[key] as any
newObject[key] = entry[key]
}
})
return newObject as unknown as T
@@ -517,9 +517,9 @@ export class RemoteTable<T = number[]> implements Table<T> {
}))
}
async indexStats (indexName: string): Promise<IndexStats> {
async indexStats (indexUuid: string): Promise<IndexStats> {
const results = await this._client.post(
`/v1/table/${encodeURIComponent(this._name)}/index/${indexName}/stats/`
`/v1/table/${encodeURIComponent(this._name)}/index/${indexUuid}/stats/`
)
const body = await results.body()
return {

View File

@@ -14,7 +14,6 @@
import { describe } from "mocha";
import { track } from "temp";
import { assert, expect } from 'chai'
import * as chai from "chai";
import * as chaiAsPromised from "chai-as-promised";
@@ -45,6 +44,8 @@ import {
} from "apache-arrow";
import type { RemoteRequest, RemoteResponse } from "../middleware";
const expect = chai.expect;
const assert = chai.assert;
chai.use(chaiAsPromised);
describe("LanceDB client", function () {
@@ -93,7 +94,7 @@ describe("LanceDB client", function () {
assert.deepEqual(await con.tableNames(), ["vectors"]);
});
it("read consistency level", async function () {
it.only("read consistency level", async function () {
const uri = await createTestDB();
const db1 = await lancedb.connect({ uri });
const table1 = await db1.openTable("vectors");
@@ -168,7 +169,7 @@ describe("LanceDB client", function () {
// Should reject a bad filter
await expect(table.filter("id % 2 = 0 AND").execute()).to.be.rejectedWith(
/.*sql parser error: .*/
/.*sql parser error: Expected an expression:, found: EOF.*/
);
});

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.11.1-beta.0"
version = "0.0.0"
license.workspace = true
description.workspace = true
repository.workspace = true
@@ -14,7 +14,7 @@ crate-type = ["cdylib"]
[dependencies]
arrow-ipc.workspace = true
futures.workspace = true
lancedb = { path = "../rust/lancedb", features = ["remote"] }
lancedb = { path = "../rust/lancedb" }
napi = { version = "2.16.8", default-features = false, features = [
"napi9",
"async",

View File

@@ -1,93 +0,0 @@
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import * as http from "http";
import { RequestListener } from "http";
import { Connection, ConnectionOptions, connect } from "../lancedb";
async function withMockDatabase(
listener: RequestListener,
callback: (db: Connection) => void,
connectionOptions?: ConnectionOptions,
) {
const server = http.createServer(listener);
server.listen(8000);
const db = await connect(
"db://dev",
Object.assign(
{
apiKey: "fake",
hostOverride: "http://localhost:8000",
},
connectionOptions,
),
);
try {
await callback(db);
} finally {
server.close();
}
}
describe("remote connection", () => {
it("should accept partial connection options", async () => {
await connect("db://test", {
apiKey: "fake",
clientConfig: {
timeoutConfig: { readTimeout: 5 },
retryConfig: { retries: 2 },
},
});
});
it("should pass down apiKey and userAgent", async () => {
await withMockDatabase(
(req, res) => {
expect(req.headers["x-api-key"]).toEqual("fake");
expect(req.headers["user-agent"]).toEqual(
`LanceDB-Node-Client/${process.env.npm_package_version}`,
);
const body = JSON.stringify({ tables: [] });
res.writeHead(200, { "Content-Type": "application/json" }).end(body);
},
async (db) => {
const tableNames = await db.tableNames();
expect(tableNames).toEqual([]);
},
);
});
it("allows customizing user agent", async () => {
await withMockDatabase(
(req, res) => {
expect(req.headers["user-agent"]).toEqual("MyApp/1.0");
const body = JSON.stringify({ tables: [] });
res.writeHead(200, { "Content-Type": "application/json" }).end(body);
},
async (db) => {
const tableNames = await db.tableNames();
expect(tableNames).toEqual([]);
},
{
clientConfig: {
userAgent: "MyApp/1.0",
},
},
);
});
});

View File

@@ -23,6 +23,8 @@ import {
Connection as LanceDbConnection,
} from "./native.js";
import { RemoteConnection, RemoteConnectionOptions } from "./remote";
export {
WriteOptions,
WriteMode,
@@ -31,9 +33,6 @@ export {
ConnectionOptions,
IndexStatistics,
IndexConfig,
ClientConfig,
TimeoutConfig,
RetryConfig,
} from "./native.js";
export {
@@ -88,7 +87,7 @@ export * as embedding from "./embedding";
*/
export async function connect(
uri: string,
opts?: Partial<ConnectionOptions>,
opts?: Partial<ConnectionOptions | RemoteConnectionOptions>,
): Promise<Connection>;
/**
* Connect to a LanceDB instance at the given URI.
@@ -109,11 +108,13 @@ export async function connect(
* ```
*/
export async function connect(
opts: Partial<ConnectionOptions> & { uri: string },
opts: Partial<RemoteConnectionOptions | ConnectionOptions> & { uri: string },
): Promise<Connection>;
export async function connect(
uriOrOptions: string | (Partial<ConnectionOptions> & { uri: string }),
opts: Partial<ConnectionOptions> = {},
uriOrOptions:
| string
| (Partial<RemoteConnectionOptions | ConnectionOptions> & { uri: string }),
opts: Partial<ConnectionOptions | RemoteConnectionOptions> = {},
): Promise<Connection> {
let uri: string | undefined;
if (typeof uriOrOptions !== "string") {
@@ -128,6 +129,9 @@ export async function connect(
throw new Error("uri is required");
}
if (uri?.startsWith("db://")) {
return new RemoteConnection(uri, opts as RemoteConnectionOptions);
}
opts = (opts as ConnectionOptions) ?? {};
(<ConnectionOptions>opts).storageOptions = cleanseStorageOptions(
(<ConnectionOptions>opts).storageOptions,

View File

@@ -0,0 +1,218 @@
// Copyright 2023 LanceDB Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import axios, {
AxiosError,
type AxiosResponse,
type ResponseType,
} from "axios";
import { Table as ArrowTable } from "../arrow";
import { tableFromIPC } from "../arrow";
import { VectorQuery } from "../query";
export class RestfulLanceDBClient {
#dbName: string;
#region: string;
#apiKey: string;
#hostOverride?: string;
#closed: boolean = false;
#timeout: number = 12 * 1000; // 12 seconds;
#session?: import("axios").AxiosInstance;
constructor(
dbName: string,
apiKey: string,
region: string,
hostOverride?: string,
timeout?: number,
) {
this.#dbName = dbName;
this.#apiKey = apiKey;
this.#region = region;
this.#hostOverride = hostOverride ?? this.#hostOverride;
this.#timeout = timeout ?? this.#timeout;
}
// todo: cache the session.
get session(): import("axios").AxiosInstance {
if (this.#session !== undefined) {
return this.#session;
} else {
return axios.create({
baseURL: this.url,
headers: {
// biome-ignore lint: external API
Authorization: `Bearer ${this.#apiKey}`,
},
transformResponse: decodeErrorData,
timeout: this.#timeout,
});
}
}
get url(): string {
return (
this.#hostOverride ??
`https://${this.#dbName}.${this.#region}.api.lancedb.com`
);
}
get headers(): { [key: string]: string } {
const headers: { [key: string]: string } = {
"x-api-key": this.#apiKey,
"x-request-id": "na",
};
if (this.#region == "local") {
headers["Host"] = `${this.#dbName}.${this.#region}.api.lancedb.com`;
}
if (this.#hostOverride) {
headers["x-lancedb-database"] = this.#dbName;
}
return headers;
}
isOpen(): boolean {
return !this.#closed;
}
private checkNotClosed(): void {
if (this.#closed) {
throw new Error("Connection is closed");
}
}
close(): void {
this.#session = undefined;
this.#closed = true;
}
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
async get(uri: string, params?: Record<string, any>): Promise<any> {
this.checkNotClosed();
uri = new URL(uri, this.url).toString();
let response;
try {
response = await this.session.get(uri, {
headers: this.headers,
params,
});
} catch (e) {
if (e instanceof AxiosError && e.response) {
response = e.response;
} else {
throw e;
}
}
RestfulLanceDBClient.checkStatus(response!);
return response!.data;
}
// biome-ignore lint/suspicious/noExplicitAny: api response
async post(uri: string, body?: any): Promise<any>;
async post(
uri: string,
// biome-ignore lint/suspicious/noExplicitAny: api request
body: any,
additional: {
config?: { responseType: "arraybuffer" };
headers?: Record<string, string>;
params?: Record<string, string>;
},
): Promise<Buffer>;
async post(
uri: string,
// biome-ignore lint/suspicious/noExplicitAny: api request
body?: any,
additional?: {
config?: { responseType: ResponseType };
headers?: Record<string, string>;
params?: Record<string, string>;
},
// biome-ignore lint/suspicious/noExplicitAny: api response
): Promise<any> {
this.checkNotClosed();
uri = new URL(uri, this.url).toString();
additional = Object.assign(
{ config: { responseType: "json" } },
additional,
);
const headers = { ...this.headers, ...additional.headers };
if (!headers["Content-Type"]) {
headers["Content-Type"] = "application/json";
}
let response;
try {
response = await this.session.post(uri, body, {
headers,
responseType: additional!.config!.responseType,
params: new Map(Object.entries(additional.params ?? {})),
});
} catch (e) {
if (e instanceof AxiosError && e.response) {
response = e.response;
} else {
throw e;
}
}
RestfulLanceDBClient.checkStatus(response!);
if (additional!.config!.responseType === "arraybuffer") {
return response!.data;
} else {
return JSON.parse(response!.data);
}
}
async listTables(limit = 10, pageToken = ""): Promise<string[]> {
const json = await this.get("/v1/table", { limit, pageToken });
return json.tables;
}
async query(tableName: string, query: VectorQuery): Promise<ArrowTable> {
const tbl = await this.post(`/v1/table/${tableName}/query`, query, {
config: {
responseType: "arraybuffer",
},
});
return tableFromIPC(tbl);
}
static checkStatus(response: AxiosResponse): void {
if (response.status === 404) {
throw new Error(`Not found: ${response.data}`);
} else if (response.status >= 400 && response.status < 500) {
throw new Error(
`Bad Request: ${response.status}, error: ${response.data}`,
);
} else if (response.status >= 500 && response.status < 600) {
throw new Error(
`Internal Server Error: ${response.status}, error: ${response.data}`,
);
} else if (response.status !== 200) {
throw new Error(
`Unknown Error: ${response.status}, error: ${response.data}`,
);
}
}
}
function decodeErrorData(data: unknown) {
if (Buffer.isBuffer(data)) {
const decoded = data.toString("utf-8");
return decoded;
}
return data;
}

View File

@@ -0,0 +1,193 @@
import { Schema } from "apache-arrow";
import {
Data,
SchemaLike,
fromTableToStreamBuffer,
makeEmptyTable,
} from "../arrow";
import {
Connection,
CreateTableOptions,
OpenTableOptions,
TableNamesOptions,
} from "../connection";
import { Table } from "../table";
import { TTLCache } from "../util";
import { RestfulLanceDBClient } from "./client";
import { RemoteTable } from "./table";
export interface RemoteConnectionOptions {
apiKey?: string;
region?: string;
hostOverride?: string;
timeout?: number;
}
export class RemoteConnection extends Connection {
#dbName: string;
#apiKey: string;
#region: string;
#client: RestfulLanceDBClient;
#tableCache = new TTLCache(300_000);
constructor(
url: string,
{ apiKey, region, hostOverride, timeout }: RemoteConnectionOptions,
) {
super();
apiKey = apiKey ?? process.env.LANCEDB_API_KEY;
region = region ?? process.env.LANCEDB_REGION;
if (!apiKey) {
throw new Error("apiKey is required when connecting to LanceDB Cloud");
}
if (!region) {
throw new Error("region is required when connecting to LanceDB Cloud");
}
const parsed = new URL(url);
if (parsed.protocol !== "db:") {
throw new Error(
`invalid protocol: ${parsed.protocol}, only accepts db://`,
);
}
this.#dbName = parsed.hostname;
this.#apiKey = apiKey;
this.#region = region;
this.#client = new RestfulLanceDBClient(
this.#dbName,
this.#apiKey,
this.#region,
hostOverride,
timeout,
);
}
isOpen(): boolean {
return this.#client.isOpen();
}
close(): void {
return this.#client.close();
}
display(): string {
return `RemoteConnection(${this.#dbName})`;
}
async tableNames(options?: Partial<TableNamesOptions>): Promise<string[]> {
const response = await this.#client.get("/v1/table/", {
limit: options?.limit ?? 10,
// biome-ignore lint/style/useNamingConvention: <explanation>
page_token: options?.startAfter ?? "",
});
const body = await response.body();
for (const table of body.tables) {
this.#tableCache.set(table, true);
}
return body.tables;
}
async openTable(
name: string,
_options?: Partial<OpenTableOptions> | undefined,
): Promise<Table> {
if (this.#tableCache.get(name) === undefined) {
await this.#client.post(
`/v1/table/${encodeURIComponent(name)}/describe/`,
);
this.#tableCache.set(name, true);
}
return new RemoteTable(this.#client, name, this.#dbName);
}
async createTable(
nameOrOptions:
| string
| ({ name: string; data: Data } & Partial<CreateTableOptions>),
data?: Data,
options?: Partial<CreateTableOptions> | undefined,
): Promise<Table> {
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
const { name, data, ...options } = nameOrOptions;
return this.createTable(name, data, options);
}
if (data === undefined) {
throw new Error("data is required");
}
if (options?.mode) {
console.warn(
"option 'mode' is not supported in LanceDB Cloud",
"LanceDB Cloud only supports the default 'create' mode.",
"If the table already exists, an error will be thrown.",
);
}
if (options?.embeddingFunction) {
console.warn(
"embedding_functions is not yet supported on LanceDB Cloud.",
"Please vote https://github.com/lancedb/lancedb/issues/626 ",
"for this feature.",
);
}
const { buf } = await Table.parseTableData(
data,
options,
true /** streaming */,
);
await this.#client.post(
`/v1/table/${encodeURIComponent(nameOrOptions)}/create/`,
buf,
{
config: {
responseType: "arraybuffer",
},
headers: { "Content-Type": "application/vnd.apache.arrow.stream" },
},
);
this.#tableCache.set(nameOrOptions, true);
return new RemoteTable(this.#client, nameOrOptions, this.#dbName);
}
async createEmptyTable(
name: string,
schema: SchemaLike,
options?: Partial<CreateTableOptions> | undefined,
): Promise<Table> {
if (options?.mode) {
console.warn(`mode is not supported on LanceDB Cloud`);
}
if (options?.embeddingFunction) {
console.warn(
"embeddingFunction is not yet supported on LanceDB Cloud.",
"Please vote https://github.com/lancedb/lancedb/issues/626 ",
"for this feature.",
);
}
const emptyTable = makeEmptyTable(schema);
const buf = await fromTableToStreamBuffer(emptyTable);
await this.#client.post(
`/v1/table/${encodeURIComponent(name)}/create/`,
buf,
{
config: {
responseType: "arraybuffer",
},
headers: { "Content-Type": "application/vnd.apache.arrow.stream" },
},
);
this.#tableCache.set(name, true);
return new RemoteTable(this.#client, name, this.#dbName);
}
async dropTable(name: string): Promise<void> {
await this.#client.post(`/v1/table/${encodeURIComponent(name)}/drop/`);
this.#tableCache.delete(name);
}
}

View File

@@ -0,0 +1,3 @@
export { RestfulLanceDBClient } from "./client";
export { type RemoteConnectionOptions, RemoteConnection } from "./connection";
export { RemoteTable } from "./table";

View File

@@ -0,0 +1,226 @@
// Copyright 2023 LanceDB Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { Table as ArrowTable } from "apache-arrow";
import { Data, IntoVector } from "../arrow";
import { IndexStatistics } from "..";
import { CreateTableOptions } from "../connection";
import { IndexOptions } from "../indices";
import { MergeInsertBuilder } from "../merge";
import { VectorQuery } from "../query";
import { AddDataOptions, Table, UpdateOptions } from "../table";
import { IntoSql, toSQL } from "../util";
import { RestfulLanceDBClient } from "./client";
export class RemoteTable extends Table {
#client: RestfulLanceDBClient;
#name: string;
// Used in the display() method
#dbName: string;
get #tablePrefix() {
return `/v1/table/${encodeURIComponent(this.#name)}/`;
}
get name(): string {
return this.#name;
}
public constructor(
client: RestfulLanceDBClient,
tableName: string,
dbName: string,
) {
super();
this.#client = client;
this.#name = tableName;
this.#dbName = dbName;
}
isOpen(): boolean {
return !this.#client.isOpen();
}
close(): void {
this.#client.close();
}
display(): string {
return `RemoteTable(${this.#dbName}; ${this.#name})`;
}
async schema(): Promise<import("apache-arrow").Schema> {
const resp = await this.#client.post(`${this.#tablePrefix}/describe/`);
// TODO: parse this into a valid arrow schema
return resp.schema;
}
async add(data: Data, options?: Partial<AddDataOptions>): Promise<void> {
const { buf, mode } = await Table.parseTableData(
data,
options as CreateTableOptions,
true,
);
await this.#client.post(`${this.#tablePrefix}/insert/`, buf, {
params: {
mode,
},
headers: {
"Content-Type": "application/vnd.apache.arrow.stream",
},
});
}
async update(
optsOrUpdates:
| (Map<string, string> | Record<string, string>)
| ({
values: Map<string, IntoSql> | Record<string, IntoSql>;
} & Partial<UpdateOptions>)
| ({
valuesSql: Map<string, string> | Record<string, string>;
} & Partial<UpdateOptions>),
options?: Partial<UpdateOptions>,
): Promise<void> {
const isValues =
"values" in optsOrUpdates && typeof optsOrUpdates.values !== "string";
const isValuesSql =
"valuesSql" in optsOrUpdates &&
typeof optsOrUpdates.valuesSql !== "string";
const isMap = (obj: unknown): obj is Map<string, string> => {
return obj instanceof Map;
};
let predicate;
let columns: [string, string][];
switch (true) {
case isMap(optsOrUpdates):
columns = Array.from(optsOrUpdates.entries());
predicate = options?.where;
break;
case isValues && isMap(optsOrUpdates.values):
columns = Array.from(optsOrUpdates.values.entries()).map(([k, v]) => [
k,
toSQL(v),
]);
predicate = optsOrUpdates.where;
break;
case isValues && !isMap(optsOrUpdates.values):
columns = Object.entries(optsOrUpdates.values).map(([k, v]) => [
k,
toSQL(v),
]);
predicate = optsOrUpdates.where;
break;
case isValuesSql && isMap(optsOrUpdates.valuesSql):
columns = Array.from(optsOrUpdates.valuesSql.entries());
predicate = optsOrUpdates.where;
break;
case isValuesSql && !isMap(optsOrUpdates.valuesSql):
columns = Object.entries(optsOrUpdates.valuesSql).map(([k, v]) => [
k,
v,
]);
predicate = optsOrUpdates.where;
break;
default:
columns = Object.entries(optsOrUpdates as Record<string, string>);
predicate = options?.where;
}
await this.#client.post(`${this.#tablePrefix}/update/`, {
predicate: predicate ?? null,
updates: columns,
});
}
async countRows(filter?: unknown): Promise<number> {
const payload = { predicate: filter };
return await this.#client.post(`${this.#tablePrefix}/count_rows/`, payload);
}
async delete(predicate: unknown): Promise<void> {
const payload = { predicate };
await this.#client.post(`${this.#tablePrefix}/delete/`, payload);
}
async createIndex(
column: string,
options?: Partial<IndexOptions>,
): Promise<void> {
if (options !== undefined) {
console.warn("options are not yet supported on the LanceDB cloud");
}
const indexType = "vector";
const metric = "L2";
const data = {
column,
// biome-ignore lint/style/useNamingConvention: external API
index_type: indexType,
// biome-ignore lint/style/useNamingConvention: external API
metric_type: metric,
};
await this.#client.post(`${this.#tablePrefix}/create_index`, data);
}
query(): import("..").Query {
throw new Error("query() is not yet supported on the LanceDB cloud");
}
search(_query: string | IntoVector): VectorQuery {
throw new Error("search() is not yet supported on the LanceDB cloud");
}
vectorSearch(_vector: unknown): import("..").VectorQuery {
throw new Error("vectorSearch() is not yet supported on the LanceDB cloud");
}
addColumns(_newColumnTransforms: unknown): Promise<void> {
throw new Error("addColumns() is not yet supported on the LanceDB cloud");
}
alterColumns(_columnAlterations: unknown): Promise<void> {
throw new Error("alterColumns() is not yet supported on the LanceDB cloud");
}
dropColumns(_columnNames: unknown): Promise<void> {
throw new Error("dropColumns() is not yet supported on the LanceDB cloud");
}
async version(): Promise<number> {
const resp = await this.#client.post(`${this.#tablePrefix}/describe/`);
return resp.version;
}
checkout(_version: unknown): Promise<void> {
throw new Error("checkout() is not yet supported on the LanceDB cloud");
}
checkoutLatest(): Promise<void> {
throw new Error(
"checkoutLatest() is not yet supported on the LanceDB cloud",
);
}
restore(): Promise<void> {
throw new Error("restore() is not yet supported on the LanceDB cloud");
}
optimize(_options?: unknown): Promise<import("../native").OptimizeStats> {
throw new Error("optimize() is not yet supported on the LanceDB cloud");
}
async listIndices(): Promise<import("../native").IndexConfig[]> {
return await this.#client.post(`${this.#tablePrefix}/index/list/`);
}
toArrow(): Promise<ArrowTable> {
throw new Error("toArrow() is not yet supported on the LanceDB cloud");
}
mergeInsert(_on: string | string[]): MergeInsertBuilder {
throw new Error("mergeInsert() is not yet supported on the LanceDB cloud");
}
async indexStats(_name: string): Promise<IndexStatistics | undefined> {
throw new Error("indexStats() is not yet supported on the LanceDB cloud");
}
}

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.11.1-beta.0",
"version": "0.11.0-beta.1",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.11.1-beta.0",
"version": "0.11.0-beta.1",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.11.1-beta.0",
"version": "0.11.0-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.11.1-beta.0",
"version": "0.11.0-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.11.1-beta.0",
"version": "0.11.0-beta.1",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.11.0",
"version": "0.10.0-beta.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.11.0",
"version": "0.10.0-beta.1",
"cpu": [
"x64",
"arm64"
@@ -18,6 +18,7 @@
"win32"
],
"dependencies": {
"axios": "^1.7.2",
"reflect-metadata": "^0.2.2"
},
"devDependencies": {
@@ -29,7 +30,6 @@
"@napi-rs/cli": "^2.18.3",
"@types/axios": "^0.14.0",
"@types/jest": "^29.1.2",
"@types/node": "^22.7.4",
"@types/tmp": "^0.2.6",
"apache-arrow-13": "npm:apache-arrow@13.0.0",
"apache-arrow-14": "npm:apache-arrow@14.0.0",
@@ -4648,12 +4648,11 @@
"optional": true
},
"node_modules/@types/node": {
"version": "22.7.4",
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.7.4.tgz",
"integrity": "sha512-y+NPi1rFzDs1NdQHHToqeiX2TIS79SWEAw9GYhkkx8bD0ChpfqC+n2j5OXOCpzfojBEBt6DnEnnG9MY0zk1XLg==",
"devOptional": true,
"version": "20.14.11",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.11.tgz",
"integrity": "sha512-kprQpL8MMeszbz6ojB5/tU8PLN4kesnN8Gjzw349rDlNgsSzg90lAVj3llK99Dh7JON+t9AuscPPFW6mPbTnSA==",
"dependencies": {
"undici-types": "~6.19.2"
"undici-types": "~5.26.4"
}
},
"node_modules/@types/node-fetch": {
@@ -4666,12 +4665,6 @@
"form-data": "^4.0.0"
}
},
"node_modules/@types/node/node_modules/undici-types": {
"version": "6.19.8",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
"integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
"devOptional": true
},
"node_modules/@types/pad-left": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/@types/pad-left/-/pad-left-2.1.1.tgz",
@@ -4970,21 +4963,6 @@
"arrow2csv": "bin/arrow2csv.cjs"
}
},
"node_modules/apache-arrow-15/node_modules/@types/node": {
"version": "20.16.10",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.16.10.tgz",
"integrity": "sha512-vQUKgWTjEIRFCvK6CyriPH3MZYiYlNy0fKiEYHWbcoWLEgs4opurGGKlebrTLqdSMIbXImH6XExNiIyNUv3WpA==",
"dev": true,
"dependencies": {
"undici-types": "~6.19.2"
}
},
"node_modules/apache-arrow-15/node_modules/undici-types": {
"version": "6.19.8",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
"integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
"dev": true
},
"node_modules/apache-arrow-16": {
"name": "apache-arrow",
"version": "16.0.0",
@@ -5006,21 +4984,6 @@
"arrow2csv": "bin/arrow2csv.cjs"
}
},
"node_modules/apache-arrow-16/node_modules/@types/node": {
"version": "20.16.10",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.16.10.tgz",
"integrity": "sha512-vQUKgWTjEIRFCvK6CyriPH3MZYiYlNy0fKiEYHWbcoWLEgs4opurGGKlebrTLqdSMIbXImH6XExNiIyNUv3WpA==",
"dev": true,
"dependencies": {
"undici-types": "~6.19.2"
}
},
"node_modules/apache-arrow-16/node_modules/undici-types": {
"version": "6.19.8",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
"integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
"dev": true
},
"node_modules/apache-arrow-17": {
"name": "apache-arrow",
"version": "17.0.0",
@@ -5048,42 +5011,12 @@
"integrity": "sha512-BwR5KP3Es/CSht0xqBcUXS3qCAUVXwpRKsV2+arxeb65atasuXG9LykC9Ab10Cw3s2raH92ZqOeILaQbsB2ACg==",
"dev": true
},
"node_modules/apache-arrow-17/node_modules/@types/node": {
"version": "20.16.10",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.16.10.tgz",
"integrity": "sha512-vQUKgWTjEIRFCvK6CyriPH3MZYiYlNy0fKiEYHWbcoWLEgs4opurGGKlebrTLqdSMIbXImH6XExNiIyNUv3WpA==",
"dev": true,
"dependencies": {
"undici-types": "~6.19.2"
}
},
"node_modules/apache-arrow-17/node_modules/flatbuffers": {
"version": "24.3.25",
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-24.3.25.tgz",
"integrity": "sha512-3HDgPbgiwWMI9zVB7VYBHaMrbOO7Gm0v+yD2FV/sCKj+9NDeVL7BOBYUuhWAQGKWOzBo8S9WdMvV0eixO233XQ==",
"dev": true
},
"node_modules/apache-arrow-17/node_modules/undici-types": {
"version": "6.19.8",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
"integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
"dev": true
},
"node_modules/apache-arrow/node_modules/@types/node": {
"version": "20.16.10",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.16.10.tgz",
"integrity": "sha512-vQUKgWTjEIRFCvK6CyriPH3MZYiYlNy0fKiEYHWbcoWLEgs4opurGGKlebrTLqdSMIbXImH6XExNiIyNUv3WpA==",
"peer": true,
"dependencies": {
"undici-types": "~6.19.2"
}
},
"node_modules/apache-arrow/node_modules/undici-types": {
"version": "6.19.8",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
"integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
"peer": true
},
"node_modules/argparse": {
"version": "1.0.10",
"resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
@@ -5113,14 +5046,12 @@
"node_modules/asynckit": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
"devOptional": true
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
},
"node_modules/axios": {
"version": "1.7.2",
"resolved": "https://registry.npmjs.org/axios/-/axios-1.7.2.tgz",
"integrity": "sha512-2A8QhOMrbomlDuiLeK9XibIBzuHeRcqqNOHp0Cyp5EoJ1IFDh+XZH3A6BkXtv0K4gFGCI0Y4BM7B1wOEi0Rmgw==",
"dev": true,
"dependencies": {
"follow-redirects": "^1.15.6",
"form-data": "^4.0.0",
@@ -5605,7 +5536,6 @@
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
"devOptional": true,
"dependencies": {
"delayed-stream": "~1.0.0"
},
@@ -5793,7 +5723,6 @@
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
"devOptional": true,
"engines": {
"node": ">=0.4.0"
}
@@ -6319,7 +6248,6 @@
"version": "1.15.6",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
"integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==",
"dev": true,
"funding": [
{
"type": "individual",
@@ -6339,7 +6267,6 @@
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
"devOptional": true,
"dependencies": {
"asynckit": "^0.4.0",
"combined-stream": "^1.0.8",
@@ -7846,7 +7773,6 @@
"version": "1.52.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
"devOptional": true,
"engines": {
"node": ">= 0.6"
}
@@ -7855,7 +7781,6 @@
"version": "2.1.35",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
"devOptional": true,
"dependencies": {
"mime-db": "1.52.0"
},
@@ -8468,8 +8393,7 @@
"node_modules/proxy-from-env": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
"dev": true
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
},
"node_modules/pump": {
"version": "3.0.0",
@@ -9637,8 +9561,7 @@
"node_modules/undici-types": {
"version": "5.26.5",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
"optional": true
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="
},
"node_modules/update-browserslist-db": {
"version": "1.0.13",

View File

@@ -10,7 +10,7 @@
"vector database",
"ann"
],
"version": "0.11.1-beta.0",
"version": "0.11.0-beta.1",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",
@@ -40,7 +40,6 @@
"@napi-rs/cli": "^2.18.3",
"@types/axios": "^0.14.0",
"@types/jest": "^29.1.2",
"@types/node": "^22.7.4",
"@types/tmp": "^0.2.6",
"apache-arrow-13": "npm:apache-arrow@13.0.0",
"apache-arrow-14": "npm:apache-arrow@14.0.0",
@@ -82,6 +81,7 @@
"version": "napi version"
},
"dependencies": {
"axios": "^1.7.2",
"reflect-metadata": "^0.2.2"
},
"optionalDependencies": {

View File

@@ -68,24 +68,6 @@ impl Connection {
builder = builder.storage_option(key, value);
}
}
let client_config = options.client_config.unwrap_or_default();
builder = builder.client_config(client_config.into());
if let Some(api_key) = options.api_key {
builder = builder.api_key(&api_key);
}
if let Some(region) = options.region {
builder = builder.region(&region);
} else {
builder = builder.region("us-east-1");
}
if let Some(host_override) = options.host_override {
builder = builder.host_override(&host_override);
}
Ok(Self::inner_new(
builder
.execute()

View File

@@ -22,7 +22,6 @@ mod index;
mod iterator;
pub mod merge;
mod query;
pub mod remote;
mod table;
mod util;
@@ -43,19 +42,6 @@ pub struct ConnectionOptions {
///
/// The available options are described at https://lancedb.github.io/lancedb/guides/storage/
pub storage_options: Option<HashMap<String, String>>,
/// (For LanceDB cloud only): configuration for the remote HTTP client.
pub client_config: Option<remote::ClientConfig>,
/// (For LanceDB cloud only): the API key to use with LanceDB Cloud.
///
/// Can also be set via the environment variable `LANCEDB_API_KEY`.
pub api_key: Option<String>,
/// (For LanceDB cloud only): the region to use for LanceDB cloud.
/// Defaults to 'us-east-1'.
pub region: Option<String>,
/// (For LanceDB cloud only): the host to use for LanceDB cloud. Used
/// for testing purposes.
pub host_override: Option<String>,
}
/// Write mode for writing a table.

View File

@@ -1,120 +0,0 @@
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use napi_derive::*;
/// Timeout configuration for remote HTTP client.
#[napi(object)]
#[derive(Debug)]
pub struct TimeoutConfig {
/// The timeout for establishing a connection in seconds. Default is 120
/// seconds (2 minutes). This can also be set via the environment variable
/// `LANCE_CLIENT_CONNECT_TIMEOUT`, as an integer number of seconds.
pub connect_timeout: Option<f64>,
/// The timeout for reading data from the server in seconds. Default is 300
/// seconds (5 minutes). This can also be set via the environment variable
/// `LANCE_CLIENT_READ_TIMEOUT`, as an integer number of seconds.
pub read_timeout: Option<f64>,
/// The timeout for keeping idle connections in the connection pool in seconds.
/// Default is 300 seconds (5 minutes). This can also be set via the
/// environment variable `LANCE_CLIENT_CONNECTION_TIMEOUT`, as an integer
/// number of seconds.
pub pool_idle_timeout: Option<f64>,
}
/// Retry configuration for the remote HTTP client.
#[napi(object)]
#[derive(Debug)]
pub struct RetryConfig {
/// The maximum number of retries for a request. Default is 3. You can also
/// set this via the environment variable `LANCE_CLIENT_MAX_RETRIES`.
pub retries: Option<u8>,
/// The maximum number of retries for connection errors. Default is 3. You
/// can also set this via the environment variable `LANCE_CLIENT_CONNECT_RETRIES`.
pub connect_retries: Option<u8>,
/// The maximum number of retries for read errors. Default is 3. You can also
/// set this via the environment variable `LANCE_CLIENT_READ_RETRIES`.
pub read_retries: Option<u8>,
/// The backoff factor to apply between retries. Default is 0.25. Between each retry
/// the client will wait for the amount of seconds:
/// `{backoff factor} * (2 ** ({number of previous retries}))`. So for the default
/// of 0.25, the first retry will wait 0.25 seconds, the second retry will wait 0.5
/// seconds, the third retry will wait 1 second, etc.
///
/// You can also set this via the environment variable
/// `LANCE_CLIENT_RETRY_BACKOFF_FACTOR`.
pub backoff_factor: Option<f64>,
/// The jitter to apply to the backoff factor, in seconds. Default is 0.25.
///
/// A random value between 0 and `backoff_jitter` will be added to the backoff
/// factor in seconds. So for the default of 0.25 seconds, between 0 and 250
/// milliseconds will be added to the sleep between each retry.
///
/// You can also set this via the environment variable
/// `LANCE_CLIENT_RETRY_BACKOFF_JITTER`.
pub backoff_jitter: Option<f64>,
/// The HTTP status codes for which to retry the request. Default is
/// [429, 500, 502, 503].
///
/// You can also set this via the environment variable
/// `LANCE_CLIENT_RETRY_STATUSES`. Use a comma-separated list of integers.
pub statuses: Option<Vec<u16>>,
}
#[napi(object)]
#[derive(Debug, Default)]
pub struct ClientConfig {
pub user_agent: Option<String>,
pub retry_config: Option<RetryConfig>,
pub timeout_config: Option<TimeoutConfig>,
}
impl From<TimeoutConfig> for lancedb::remote::TimeoutConfig {
fn from(config: TimeoutConfig) -> Self {
Self {
connect_timeout: config
.connect_timeout
.map(std::time::Duration::from_secs_f64),
read_timeout: config.read_timeout.map(std::time::Duration::from_secs_f64),
pool_idle_timeout: config
.pool_idle_timeout
.map(std::time::Duration::from_secs_f64),
}
}
}
impl From<RetryConfig> for lancedb::remote::RetryConfig {
fn from(config: RetryConfig) -> Self {
Self {
retries: config.retries,
connect_retries: config.connect_retries,
read_retries: config.read_retries,
backoff_factor: config.backoff_factor.map(|v| v as f32),
backoff_jitter: config.backoff_jitter.map(|v| v as f32),
statuses: config.statuses,
}
}
}
impl From<ClientConfig> for lancedb::remote::ClientConfig {
fn from(config: ClientConfig) -> Self {
Self {
user_agent: config
.user_agent
.unwrap_or(concat!("LanceDB-Node-Client/", env!("CARGO_PKG_VERSION")).to_string()),
retry_config: config.retry_config.map(Into::into).unwrap_or_default(),
timeout_config: config.timeout_config.map(Into::into).unwrap_or_default(),
}
}
}

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.14.1-beta.1"
current_version = "0.14.0-beta.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.14.1-beta.1"
version = "0.14.0-beta.0"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -3,8 +3,9 @@ name = "lancedb"
# version in Cargo.toml
dependencies = [
"deprecation",
"pylance==0.18.3-beta.2",
"pylance==0.18.0",
"requests>=2.31.0",
"retry>=0.9.2",
"tqdm>=4.27.0",
"pydantic>=1.10",
"attrs>=21.3.0",

View File

@@ -20,7 +20,7 @@ from .util import safe_import_pandas
pd = safe_import_pandas()
DATA = Union[List[dict], "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]]
DATA = Union[List[dict], dict, "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]]
VEC = Union[list, np.ndarray, pa.Array, pa.ChunkedArray]
URI = Union[str, Path]
VECTOR_COLUMN_NAME = "vector"

View File

@@ -96,7 +96,7 @@ class DBConnection(EnforceOverrides):
User must provide at least one of `data` or `schema`.
Acceptable types are:
- list-of-dict
- dict or list-of-dict
- pandas.DataFrame
@@ -579,7 +579,7 @@ class AsyncConnection(object):
User must provide at least one of `data` or `schema`.
Acceptable types are:
- list-of-dict
- dict or list-of-dict
- pandas.DataFrame

View File

@@ -21,35 +21,14 @@ import time
import urllib.error
import weakref
import logging
from functools import wraps
from typing import Callable, List, Union
import numpy as np
import pyarrow as pa
from lance.vector import vec_to_table
from retry import retry
from ..util import deprecated, safe_import_pandas
# ruff: noqa: PERF203
def retry(tries=10, delay=1, max_delay=30, backoff=3, jitter=1):
def wrapper(fn):
@wraps(fn)
def wrapped(*args, **kwargs):
for i in range(tries):
try:
return fn(*args, **kwargs)
except Exception:
if i + 1 == tries:
raise
else:
sleep = min(delay * (backoff**i) + jitter, max_delay)
time.sleep(sleep)
return wrapped
return wrapper
pd = safe_import_pandas()
DATA = Union[pa.Table, "pd.DataFrame"]

View File

@@ -88,11 +88,6 @@ class Query(pydantic.BaseModel):
tuning advice.
offset: int
The offset to start fetching results from
fast_search: bool
Skip a flat search of unindexed data. This will improve
search performance but search results will not include unindexed data.
- *default False*.
"""
vector_column: Optional[str] = None
@@ -129,8 +124,6 @@ class Query(pydantic.BaseModel):
offset: int = 0
fast_search: bool = False
class LanceQueryBuilder(ABC):
"""An abstract query builder. Subclasses are defined for vector search,
@@ -146,7 +139,6 @@ class LanceQueryBuilder(ABC):
vector_column_name: str,
ordering_field_name: Optional[str] = None,
fts_columns: Union[str, List[str]] = [],
fast_search: bool = False,
) -> LanceQueryBuilder:
"""
Create a query builder based on the given query and query type.
@@ -163,8 +155,6 @@ class LanceQueryBuilder(ABC):
If "auto", the query type is inferred based on the query.
vector_column_name: str
The name of the vector column to use for vector search.
fast_search: bool
Skip flat search of unindexed data.
"""
# Check hybrid search first as it supports empty query pattern
if query_type == "hybrid":
@@ -206,9 +196,7 @@ class LanceQueryBuilder(ABC):
else:
raise TypeError(f"Unsupported query type: {type(query)}")
return LanceVectorQueryBuilder(
table, query, vector_column_name, str_query, fast_search
)
return LanceVectorQueryBuilder(table, query, vector_column_name, str_query)
@classmethod
def _resolve_query(cls, table, query, query_type, vector_column_name):
@@ -577,7 +565,6 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
query: Union[np.ndarray, list, "PIL.Image.Image"],
vector_column: str,
str_query: Optional[str] = None,
fast_search: bool = False,
):
super().__init__(table)
self._query = query
@@ -588,7 +575,6 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._prefilter = False
self._reranker = None
self._str_query = str_query
self._fast_search = fast_search
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
"""Set the distance metric to use.
@@ -689,7 +675,6 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
vector_column=self._vector_column,
with_row_id=self._with_row_id,
offset=self._offset,
fast_search=self._fast_search,
)
result_set = self._table._execute_query(query, batch_size)
if self._reranker is not None:

View File

@@ -50,8 +50,6 @@ class VectorQuery(BaseModel):
vector_column: str = VECTOR_COLUMN_NAME
fast_search: bool = False
@attrs.define
class VectorQueryResult:

View File

@@ -103,29 +103,19 @@ class RestfulLanceDBClient:
@staticmethod
def _check_status(resp: requests.Response):
# Leaving request id empty for now, as we'll be replacing this impl
# with the Rust one shortly.
if resp.status_code == 404:
raise LanceDBClientError(
f"Not found: {resp.text}", request_id="", status_code=404
)
raise LanceDBClientError(f"Not found: {resp.text}")
elif 400 <= resp.status_code < 500:
raise LanceDBClientError(
f"Bad Request: {resp.status_code}, error: {resp.text}",
request_id="",
status_code=resp.status_code,
f"Bad Request: {resp.status_code}, error: {resp.text}"
)
elif 500 <= resp.status_code < 600:
raise LanceDBClientError(
f"Internal Server Error: {resp.status_code}, error: {resp.text}",
request_id="",
status_code=resp.status_code,
f"Internal Server Error: {resp.status_code}, error: {resp.text}"
)
elif resp.status_code != 200:
raise LanceDBClientError(
f"Unknown Error: {resp.status_code}, error: {resp.text}",
request_id="",
status_code=resp.status_code,
f"Unknown Error: {resp.status_code}, error: {resp.text}"
)
@_check_not_closed

View File

@@ -12,102 +12,5 @@
# limitations under the License.
from typing import Optional
class LanceDBClientError(RuntimeError):
"""An error that occurred in the LanceDB client.
Attributes
----------
message: str
The error message.
request_id: str
The id of the request that failed. This can be provided in error reports
to help diagnose the issue.
status_code: int
The HTTP status code of the response. May be None if the request
failed before the response was received.
"""
def __init__(
self, message: str, request_id: str, status_code: Optional[int] = None
):
super().__init__(message)
self.request_id = request_id
self.status_code = status_code
class HttpError(LanceDBClientError):
"""An error that occurred during an HTTP request.
Attributes
----------
message: str
The error message.
request_id: str
The id of the request that failed. This can be provided in error reports
to help diagnose the issue.
status_code: int
The HTTP status code of the response. May be None if the request
failed before the response was received.
"""
pass
class RetryError(LanceDBClientError):
"""An error that occurs when the client has exceeded the maximum number of retries.
The retry strategy can be adjusted by setting the
[retry_config](lancedb.remote.ClientConfig.retry_config) in the client
configuration. This is passed in the `client_config` argument of
[connect](lancedb.connect) and [connect_async](lancedb.connect_async).
The __cause__ attribute of this exception will be the last exception that
caused the retry to fail. It will be an
[HttpError][lancedb.remote.errors.HttpError] instance.
Attributes
----------
message: str
The retry error message, which will describe which retry limit was hit.
request_id: str
The id of the request that failed. This can be provided in error reports
to help diagnose the issue.
request_failures: int
The number of request failures.
connect_failures: int
The number of connect failures.
read_failures: int
The number of read failures.
max_request_failures: int
The maximum number of request failures.
max_connect_failures: int
The maximum number of connect failures.
max_read_failures: int
The maximum number of read failures.
status_code: int
The HTTP status code of the last response. May be None if the request
failed before the response was received.
"""
def __init__(
self,
message: str,
request_id: str,
request_failures: int,
connect_failures: int,
read_failures: int,
max_request_failures: int,
max_connect_failures: int,
max_read_failures: int,
status_code: Optional[int],
):
super().__init__(message, request_id, status_code)
self.request_failures = request_failures
self.connect_failures = connect_failures
self.read_failures = read_failures
self.max_request_failures = max_request_failures
self.max_connect_failures = max_connect_failures
self.max_read_failures = max_read_failures

View File

@@ -270,7 +270,6 @@ class RemoteTable(Table):
vector_column_name: Optional[str] = None,
query_type="auto",
fts_columns: Optional[Union[str, List[str]]] = None,
fast_search: bool = False,
) -> LanceVectorQueryBuilder:
"""Create a search query to find the nearest neighbors
of the given query vector. We currently support [vector search][search]
@@ -315,12 +314,6 @@ class RemoteTable(Table):
- If the table has multiple vector columns then the *vector_column_name*
needs to be specified. Otherwise, an error is raised.
fast_search: bool, optional
Skip a flat search of unindexed data. This may improve
search performance but search results will not include unindexed data.
- *default False*.
Returns
-------
LanceQueryBuilder
@@ -350,7 +343,6 @@ class RemoteTable(Table):
query_type,
vector_column_name=vector_column_name,
fts_columns=fts_columns,
fast_search=fast_search,
)
def _execute_query(

View File

@@ -31,6 +31,7 @@ import pyarrow.compute as pc
import pyarrow.fs as pa_fs
from lance import LanceDataset
from lance.dependencies import _check_for_hugging_face
from lance.vector import vec_to_table
from .common import DATA, VEC, VECTOR_COLUMN_NAME
from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
@@ -86,9 +87,6 @@ def _coerce_to_table(data, schema: Optional[pa.Schema] = None) -> pa.Table:
if isinstance(data, LanceModel):
raise ValueError("Cannot add a single LanceModel to a table. Use a list.")
if isinstance(data, dict):
raise ValueError("Cannot add a single dictionary to a table. Use a list.")
if isinstance(data, list):
# convert to list of dict if data is a bunch of LanceModels
if isinstance(data[0], LanceModel):
@@ -100,6 +98,8 @@ def _coerce_to_table(data, schema: Optional[pa.Schema] = None) -> pa.Table:
return pa.Table.from_batches(data, schema=schema)
else:
return pa.Table.from_pylist(data)
elif isinstance(data, dict):
return vec_to_table(data)
elif _check_for_pandas(data) and isinstance(data, pd.DataFrame):
# Do not add schema here, since schema may contains the vector column
table = pa.Table.from_pandas(data, preserve_index=False)
@@ -554,7 +554,7 @@ class Table(ABC):
data: DATA
The data to insert into the table. Acceptable types are:
- list-of-dict
- dict or list-of-dict
- pandas.DataFrame
@@ -1409,7 +1409,7 @@ class LanceTable(Table):
Parameters
----------
data: list-of-dict, pd.DataFrame
data: list-of-dict, dict, pd.DataFrame
The data to insert into the table.
mode: str
The mode to use when writing the data. Valid values are
@@ -2348,7 +2348,7 @@ class AsyncTable:
data: DATA
The data to insert into the table. Acceptable types are:
- list-of-dict
- dict or list-of-dict
- pandas.DataFrame

View File

@@ -354,7 +354,7 @@ async def test_create_mode_async(tmp_path):
)
await db.create_table("test", data=data)
with pytest.raises(ValueError, match="already exists"):
with pytest.raises(RuntimeError):
await db.create_table("test", data=data)
new_data = pd.DataFrame(
@@ -382,7 +382,7 @@ async def test_create_exist_ok_async(tmp_path):
)
tbl = await db.create_table("test", data=data)
with pytest.raises(ValueError, match="already exists"):
with pytest.raises(RuntimeError):
await db.create_table("test", data=data)
# open the table but don't add more rows

View File

@@ -11,7 +11,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List, Union
from unittest.mock import MagicMock, patch
import lance
import lancedb
@@ -26,7 +25,6 @@ from lancedb.embeddings import (
)
from lancedb.embeddings.base import TextEmbeddingFunction
from lancedb.embeddings.registry import get_registry, register
from lancedb.embeddings.utils import retry
from lancedb.pydantic import LanceModel, Vector
@@ -227,12 +225,3 @@ def test_embedding_function_safe_model_dump(embedding_type):
f"{embedding_type}: Private attribute '{key}' "
f"is present in dumped model"
)
@patch("time.sleep")
def test_retry(mock_sleep):
test_function = MagicMock(side_effect=[Exception] * 9 + ["result"])
test_function = retry()(test_function)
result = test_function()
assert mock_sleep.call_count == 9
assert result == "result"

View File

@@ -1,14 +1,12 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
import contextlib
import http.server
import threading
from unittest.mock import MagicMock
import uuid
import lancedb
from lancedb.remote.errors import HttpError, RetryError
import pyarrow as pa
from lancedb.remote.client import VectorQuery, VectorQueryResult
import pytest
@@ -100,33 +98,6 @@ def make_mock_http_handler(handler):
return MockLanceDBHandler
@contextlib.asynccontextmanager
async def mock_lancedb_connection(handler):
with http.server.HTTPServer(
("localhost", 8080), make_mock_http_handler(handler)
) as server:
handle = threading.Thread(target=server.serve_forever)
handle.start()
db = await lancedb.connect_async(
"db://dev",
api_key="fake",
host_override="http://localhost:8080",
client_config={
"retry_config": {"retries": 2},
"timeout_config": {
"connect_timeout": 1,
},
},
)
try:
yield db
finally:
server.shutdown()
handle.join()
@pytest.mark.asyncio
async def test_async_remote_db():
def handler(request):
@@ -143,50 +114,28 @@ async def test_async_remote_db():
request.end_headers()
request.wfile.write(b'{"tables": []}')
async with mock_lancedb_connection(handler) as db:
table_names = await db.table_names()
assert table_names == []
def run_server():
with http.server.HTTPServer(
("localhost", 8080), make_mock_http_handler(handler)
) as server:
# we will only make one request
server.handle_request()
handle = threading.Thread(target=run_server)
handle.start()
@pytest.mark.asyncio
async def test_http_error():
request_id_holder = {"request_id": None}
db = await lancedb.connect_async(
"db://dev",
api_key="fake",
host_override="http://localhost:8080",
client_config={
"retry_config": {"retries": 2},
"timeout_config": {
"connect_timeout": 1,
},
},
)
table_names = await db.table_names()
assert table_names == []
def handler(request):
request_id_holder["request_id"] = request.headers["x-request-id"]
request.send_response(507)
request.end_headers()
request.wfile.write(b"Internal Server Error")
async with mock_lancedb_connection(handler) as db:
with pytest.raises(HttpError, match="Internal Server Error") as exc_info:
await db.table_names()
assert exc_info.value.request_id == request_id_holder["request_id"]
assert exc_info.value.status_code == 507
@pytest.mark.asyncio
async def test_retry_error():
request_id_holder = {"request_id": None}
def handler(request):
request_id_holder["request_id"] = request.headers["x-request-id"]
request.send_response(429)
request.end_headers()
request.wfile.write(b"Try again later")
async with mock_lancedb_connection(handler) as db:
with pytest.raises(RetryError, match="Hit retry limit") as exc_info:
await db.table_names()
assert exc_info.value.request_id == request_id_holder["request_id"]
assert exc_info.value.status_code == 429
cause = exc_info.value.__cause__
assert isinstance(cause, HttpError)
assert "Try again later" in str(cause)
assert cause.request_id == request_id_holder["request_id"]
assert cause.status_code == 429
handle.join()

View File

@@ -193,24 +193,6 @@ def test_empty_table(db):
tbl.add(data=data)
def test_add_dictionary(db):
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), 2)),
pa.field("item", pa.string()),
pa.field("price", pa.float32()),
]
)
tbl = LanceTable.create(db, "test", schema=schema)
data = {"vector": [3.1, 4.1], "item": "foo", "price": 10.0}
with pytest.raises(ValueError) as excep_info:
tbl.add(data=data)
assert (
str(excep_info.value)
== "Cannot add a single dictionary to a table. Use a list."
)
def test_add(db):
schema = pa.schema(
[

View File

@@ -14,9 +14,7 @@
use pyo3::{
exceptions::{PyIOError, PyNotImplementedError, PyOSError, PyRuntimeError, PyValueError},
intern,
types::{PyAnyMethods, PyNone},
PyErr, PyResult, Python,
PyResult,
};
use lancedb::error::Error as LanceError;
@@ -40,79 +38,12 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
LanceError::InvalidInput { .. }
| LanceError::InvalidTableName { .. }
| LanceError::TableNotFound { .. }
| LanceError::Schema { .. }
| LanceError::TableAlreadyExists { .. } => self.value_error(),
| LanceError::Schema { .. } => self.value_error(),
LanceError::CreateDir { .. } => self.os_error(),
LanceError::ObjectStore { .. } => Err(PyIOError::new_err(err.to_string())),
LanceError::NotSupported { .. } => {
Err(PyNotImplementedError::new_err(err.to_string()))
}
LanceError::Http {
request_id,
source,
status_code,
} => Python::with_gil(|py| {
let message = err.to_string();
let http_err_cls = py
.import_bound(intern!(py, "lancedb.remote.errors"))?
.getattr(intern!(py, "HttpError"))?;
let err = http_err_cls.call1((
message,
request_id,
status_code.map(|s| s.as_u16()),
))?;
if let Some(cause) = source.source() {
// The HTTP error already includes the first cause. But
// we can add the rest of the chain if there is any more.
let cause_err = http_from_rust_error(
py,
cause,
request_id,
status_code.map(|s| s.as_u16()),
)?;
err.setattr(intern!(py, "__cause__"), cause_err)?;
}
Err(PyErr::from_value_bound(err))
}),
LanceError::Retry {
request_id,
request_failures,
max_request_failures,
connect_failures,
max_connect_failures,
read_failures,
max_read_failures,
source,
status_code,
} => Python::with_gil(|py| {
let cause_err = http_from_rust_error(
py,
source.as_ref(),
request_id,
status_code.map(|s| s.as_u16()),
)?;
let message = err.to_string();
let retry_error_cls = py
.import_bound(intern!(py, "lancedb.remote.errors"))?
.getattr("RetryError")?;
let err = retry_error_cls.call1((
message,
request_id,
*request_failures,
*connect_failures,
*read_failures,
*max_request_failures,
*max_connect_failures,
*max_read_failures,
status_code.map(|s| s.as_u16()),
))?;
err.setattr(intern!(py, "__cause__"), cause_err)?;
Err(PyErr::from_value_bound(err))
}),
_ => self.runtime_error(),
},
}
@@ -130,24 +61,3 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
self.map_err(|err| PyValueError::new_err(err.to_string()))
}
}
fn http_from_rust_error(
py: Python<'_>,
err: &dyn std::error::Error,
request_id: &str,
status_code: Option<u16>,
) -> PyResult<PyErr> {
let message = err.to_string();
let http_err_cls = py.import("lancedb.remote.errors")?.getattr("HttpError")?;
let py_err = http_err_cls.call1((message, request_id, status_code))?;
// Reset the traceback since it doesn't provide additional information.
let py_err = py_err.call_method1(intern!(py, "with_traceback"), (PyNone::get_bound(py),))?;
if let Some(cause) = err.source() {
let cause_err = http_from_rust_error(py, cause, request_id, status_code)?;
py_err.setattr(intern!(py, "__cause__"), cause_err)?;
}
Ok(PyErr::from_value(py_err))
}

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-node"
version = "0.11.1-beta.0"
version = "0.11.0-beta.1"
description = "Serverless, low-latency vector database for AI applications"
license.workspace = true
edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.11.1-beta.0"
version = "0.11.0-beta.1"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -196,6 +196,22 @@ impl<T: IntoArrow> CreateTableBuilder<true, T> {
};
Ok((data, builder))
}
pub fn add_embedding(mut self, definition: EmbeddingDefinition) -> Result<Self> {
// Early verification of the embedding name
let embedding_func = self
.parent
.embedding_registry()
.get(&definition.embedding_name)
.ok_or_else(|| Error::EmbeddingFunctionNotFound {
name: definition.embedding_name.clone(),
reason: "No embedding function found in the connection's embedding_registry"
.to_string(),
})?;
self.embeddings.push((definition, embedding_func));
Ok(self)
}
}
// Builder methods that only apply when we do not have initial data
@@ -313,26 +329,6 @@ impl<const HAS_DATA: bool, T: IntoArrow> CreateTableBuilder<HAS_DATA, T> {
};
self
}
/// Add an embedding definition to the table.
///
/// The `embedding_name` must match the name of an embedding function that
/// was previously registered with the connection's [`EmbeddingRegistry`].
pub fn add_embedding(mut self, definition: EmbeddingDefinition) -> Result<Self> {
// Early verification of the embedding name
let embedding_func = self
.parent
.embedding_registry()
.get(&definition.embedding_name)
.ok_or_else(|| Error::EmbeddingFunctionNotFound {
name: definition.embedding_name.clone(),
reason: "No embedding function found in the connection's embedding_registry"
.to_string(),
})?;
self.embeddings.push((definition, embedding_func));
Ok(self)
}
}
#[derive(Clone, Debug)]

View File

@@ -46,37 +46,8 @@ pub enum Error {
ObjectStore { source: object_store::Error },
#[snafu(display("lance error: {source}"))]
Lance { source: lance::Error },
#[cfg(feature = "remote")]
#[snafu(display("Http error: (request_id={request_id}) {source}"))]
Http {
#[snafu(source(from(reqwest::Error, Box::new)))]
source: Box<dyn std::error::Error + Send + Sync>,
request_id: String,
/// Status code associated with the error, if available.
/// This is not always available, for example when the error is due to a
/// connection failure. It may also be missing if the request was
/// successful but there was an error decoding the response.
status_code: Option<reqwest::StatusCode>,
},
#[cfg(feature = "remote")]
#[snafu(display(
"Hit retry limit for request_id={request_id} (\
request_failures={request_failures}/{max_request_failures}, \
connect_failures={connect_failures}/{max_connect_failures}, \
read_failures={read_failures}/{max_read_failures})"
))]
Retry {
request_id: String,
request_failures: u8,
max_request_failures: u8,
connect_failures: u8,
max_connect_failures: u8,
read_failures: u8,
max_read_failures: u8,
#[snafu(source(from(reqwest::Error, Box::new)))]
source: Box<dyn std::error::Error + Send + Sync>,
status_code: Option<reqwest::StatusCode>,
},
#[snafu(display("Http error: {message}"))]
Http { message: String },
#[snafu(display("Arrow error: {source}"))]
Arrow { source: ArrowError },
#[snafu(display("LanceDBError: not supported: {message}"))]
@@ -127,6 +98,24 @@ impl<T> From<PoisonError<T>> for Error {
}
}
#[cfg(feature = "remote")]
impl From<reqwest::Error> for Error {
fn from(e: reqwest::Error) -> Self {
Self::Http {
message: e.to_string(),
}
}
}
#[cfg(feature = "remote")]
impl From<url::ParseError> for Error {
fn from(e: url::ParseError) -> Self {
Self::Http {
message: e.to_string(),
}
}
}
#[cfg(feature = "polars")]
impl From<polars::prelude::PolarsError> for Error {
fn from(source: polars::prelude::PolarsError) -> Self {

View File

@@ -144,7 +144,7 @@ impl std::str::FromStr for IndexType {
"BTREE" => Ok(Self::BTree),
"BITMAP" => Ok(Self::Bitmap),
"LABEL_LIST" | "LABELLIST" => Ok(Self::LabelList),
"FTS" | "INVERTED" => Ok(Self::FTS),
"FTS" => Ok(Self::FTS),
"IVF_PQ" => Ok(Self::IvfPq),
"IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
"IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),

View File

@@ -216,12 +216,10 @@ impl RestfulLanceDbClient<Sender> {
host_override: Option<String>,
client_config: ClientConfig,
) -> Result<Self> {
let parsed_url = url::Url::parse(db_url).map_err(|err| Error::InvalidInput {
message: format!("db_url is not a valid URL. '{db_url}'. Error: {err}"),
})?;
let parsed_url = url::Url::parse(db_url)?;
debug_assert_eq!(parsed_url.scheme(), "db");
if !parsed_url.has_host() {
return Err(Error::InvalidInput {
return Err(Error::Http {
message: format!("Invalid database URL (missing host) '{}'", db_url),
});
}
@@ -257,11 +255,7 @@ impl RestfulLanceDbClient<Sender> {
host_override.is_some(),
)?)
.user_agent(client_config.user_agent)
.build()
.map_err(|err| Error::Other {
message: "Failed to build HTTP client".into(),
source: Some(Box::new(err)),
})?;
.build()?;
let host = match host_override {
Some(host_override) => host_override,
None => format!("https://{}.{}.api.lancedb.com", db_name, region),
@@ -290,7 +284,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
let mut headers = HeaderMap::new();
headers.insert(
"x-api-key",
HeaderValue::from_str(api_key).map_err(|_| Error::InvalidInput {
HeaderValue::from_str(api_key).map_err(|_| Error::Http {
message: "non-ascii api key provided".to_string(),
})?,
);
@@ -298,7 +292,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
let host = format!("{}.local.api.lancedb.com", db_name);
headers.insert(
"Host",
HeaderValue::from_str(&host).map_err(|_| Error::InvalidInput {
HeaderValue::from_str(&host).map_err(|_| Error::Http {
message: format!("non-ascii database name '{}' provided", db_name),
})?,
);
@@ -306,7 +300,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
if has_host_override {
headers.insert(
"x-lancedb-database",
HeaderValue::from_str(db_name).map_err(|_| Error::InvalidInput {
HeaderValue::from_str(db_name).map_err(|_| Error::Http {
message: format!("non-ascii database name '{}' provided", db_name),
})?,
);
@@ -325,30 +319,22 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
self.client.post(full_uri)
}
pub async fn send(&self, req: RequestBuilder, with_retry: bool) -> Result<(String, Response)> {
pub async fn send(&self, req: RequestBuilder, with_retry: bool) -> Result<Response> {
let (client, request) = req.build_split();
let mut request = request.unwrap();
// Set a request id.
// TODO: allow the user to supply this, through middleware?
let request_id = if let Some(request_id) = request.headers().get(REQUEST_ID_HEADER) {
request_id.to_str().unwrap().to_string()
} else {
let request_id = uuid::Uuid::new_v4().to_string();
let header = HeaderValue::from_str(&request_id).unwrap();
request.headers_mut().insert(REQUEST_ID_HEADER, header);
request_id
};
if request.headers().get(REQUEST_ID_HEADER).is_none() {
let request_id = uuid::Uuid::new_v4();
let request_id = HeaderValue::from_str(&request_id.to_string()).unwrap();
request.headers_mut().insert(REQUEST_ID_HEADER, request_id);
}
if with_retry {
self.send_with_retry_impl(client, request, request_id).await
self.send_with_retry_impl(client, request).await
} else {
let response = self
.sender
.send(&client, request)
.await
.err_to_http(request_id.clone())?;
Ok((request_id, response))
Ok(self.sender.send(&client, request).await?)
}
}
@@ -356,180 +342,100 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
&self,
client: reqwest::Client,
req: Request,
request_id: String,
) -> Result<(String, Response)> {
let mut retry_counter = RetryCounter::new(&self.retry_config, request_id);
) -> Result<Response> {
let mut request_failures = 0;
let mut connect_failures = 0;
let mut read_failures = 0;
loop {
// This only works if the request body is not a stream. If it is
// a stream, we can't use the retry path. We would need to implement
// an outer retry.
let request = req.try_clone().ok_or_else(|| Error::Runtime {
let request = req.try_clone().ok_or_else(|| Error::Http {
message: "Attempted to retry a request that cannot be cloned".to_string(),
})?;
let response = self
.sender
.send(&client, request)
.await
.map(|r| (r.status(), r));
match response {
Ok((status, response)) if status.is_success() => {
return Ok((retry_counter.request_id, response))
}
Ok((status, response)) if self.retry_config.statuses.contains(&status) => {
let source = self
.check_response(&retry_counter.request_id, response)
.await
.unwrap_err();
retry_counter.increment_request_failures(source)?;
let response = self.sender.send(&client, request).await;
let status_code = response.as_ref().map(|r| r.status());
match status_code {
Ok(status) if status.is_success() => return Ok(response?),
Ok(status) if self.retry_config.statuses.contains(&status) => {
request_failures += 1;
if request_failures >= self.retry_config.retries {
// TODO: better error
return Err(Error::Runtime {
message: format!(
"Request failed after {} retries with status code {}",
request_failures, status
),
});
}
}
Err(err) if err.is_connect() => {
retry_counter.increment_connect_failures(err)?;
connect_failures += 1;
if connect_failures >= self.retry_config.connect_retries {
return Err(Error::Runtime {
message: format!(
"Request failed after {} connect retries with error: {}",
connect_failures, err
),
});
}
}
Err(err) if err.is_timeout() || err.is_body() || err.is_decode() => {
retry_counter.increment_read_failures(err)?;
read_failures += 1;
if read_failures >= self.retry_config.read_retries {
return Err(Error::Runtime {
message: format!(
"Request failed after {} read retries with error: {}",
read_failures, err
),
});
}
}
Err(err) => {
let status_code = err.status();
return Err(Error::Http {
source: Box::new(err),
request_id: retry_counter.request_id,
status_code,
});
}
Ok((_, response)) => return Ok((retry_counter.request_id, response)),
Ok(_) | Err(_) => return Ok(response?),
}
let sleep_time = retry_counter.next_sleep_time();
let backoff = self.retry_config.backoff_factor * (2.0f32.powi(request_failures as i32));
let jitter = rand::random::<f32>() * self.retry_config.backoff_jitter;
let sleep_time = Duration::from_secs_f32(backoff + jitter);
debug!(
"Retrying request {:?} ({}/{} connect, {}/{} read, {}/{} read) in {:?}",
req.headers()
.get("x-request-id")
.and_then(|v| v.to_str().ok()),
connect_failures,
self.retry_config.connect_retries,
request_failures,
self.retry_config.retries,
read_failures,
self.retry_config.read_retries,
sleep_time
);
tokio::time::sleep(sleep_time).await;
}
}
pub async fn check_response(&self, request_id: &str, response: Response) -> Result<Response> {
// Try to get the response text, but if that fails, just return the status code
async fn rsp_to_str(response: Response) -> String {
let status = response.status();
if status.is_success() {
response.text().await.unwrap_or_else(|_| status.to_string())
}
pub async fn check_response(&self, response: Response) -> Result<Response> {
let status_int: u16 = u16::from(response.status());
if (400..500).contains(&status_int) {
Err(Error::InvalidInput {
message: Self::rsp_to_str(response).await,
})
} else if status_int != 200 {
Err(Error::Runtime {
message: Self::rsp_to_str(response).await,
})
} else {
Ok(response)
} else {
let response_text = response.text().await.ok();
let message = if let Some(response_text) = response_text {
format!("{}: {}", status, response_text)
} else {
status.to_string()
};
Err(Error::Http {
source: message.into(),
request_id: request_id.into(),
status_code: Some(status),
})
}
}
}
struct RetryCounter<'a> {
request_failures: u8,
connect_failures: u8,
read_failures: u8,
config: &'a ResolvedRetryConfig,
request_id: String,
}
impl<'a> RetryCounter<'a> {
fn new(config: &'a ResolvedRetryConfig, request_id: String) -> Self {
Self {
request_failures: 0,
connect_failures: 0,
read_failures: 0,
config,
request_id,
}
}
fn check_out_of_retries(
&self,
source: Box<dyn std::error::Error + Send + Sync>,
status_code: Option<reqwest::StatusCode>,
) -> Result<()> {
if self.request_failures >= self.config.retries
|| self.connect_failures >= self.config.connect_retries
|| self.read_failures >= self.config.read_retries
{
Err(Error::Retry {
request_id: self.request_id.clone(),
request_failures: self.request_failures,
max_request_failures: self.config.retries,
connect_failures: self.connect_failures,
max_connect_failures: self.config.connect_retries,
read_failures: self.read_failures,
max_read_failures: self.config.read_retries,
source,
status_code,
})
} else {
Ok(())
}
}
fn increment_request_failures(&mut self, source: crate::Error) -> Result<()> {
self.request_failures += 1;
let status_code = if let crate::Error::Http { status_code, .. } = &source {
*status_code
} else {
None
};
self.check_out_of_retries(Box::new(source), status_code)
}
fn increment_connect_failures(&mut self, source: reqwest::Error) -> Result<()> {
self.connect_failures += 1;
let status_code = source.status();
self.check_out_of_retries(Box::new(source), status_code)
}
fn increment_read_failures(&mut self, source: reqwest::Error) -> Result<()> {
self.read_failures += 1;
let status_code = source.status();
self.check_out_of_retries(Box::new(source), status_code)
}
fn next_sleep_time(&self) -> Duration {
let backoff = self.config.backoff_factor * (2.0f32.powi(self.request_failures as i32));
let jitter = rand::random::<f32>() * self.config.backoff_jitter;
let sleep_time = Duration::from_secs_f32(backoff + jitter);
debug!(
"Retrying request {:?} ({}/{} connect, {}/{} read, {}/{} read) in {:?}",
self.request_id,
self.connect_failures,
self.config.connect_retries,
self.request_failures,
self.config.retries,
self.read_failures,
self.config.read_retries,
sleep_time
);
sleep_time
}
}
pub trait RequestResultExt {
type Output;
fn err_to_http(self, request_id: String) -> Result<Self::Output>;
}
impl<T> RequestResultExt for reqwest::Result<T> {
type Output = T;
fn err_to_http(self, request_id: String) -> Result<T> {
self.map_err(|err| {
let status_code = err.status();
Error::Http {
source: Box::new(err),
request_id,
status_code,
}
})
}
}
#[cfg(test)]
pub mod test_utils {
use std::sync::Arc;

View File

@@ -29,7 +29,7 @@ use crate::embeddings::EmbeddingRegistry;
use crate::error::Result;
use crate::Table;
use super::client::{ClientConfig, HttpSend, RequestResultExt, RestfulLanceDbClient, Sender};
use super::client::{ClientConfig, HttpSend, RestfulLanceDbClient, Sender};
use super::table::RemoteTable;
use super::util::batches_to_ipc_bytes;
use super::ARROW_STREAM_CONTENT_TYPE;
@@ -105,13 +105,9 @@ impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
if let Some(start_after) = options.start_after {
req = req.query(&[("page_token", start_after)]);
}
let (request_id, rsp) = self.client.send(req, true).await?;
let rsp = self.client.check_response(&request_id, rsp).await?;
let tables = rsp
.json::<ListTablesResponse>()
.await
.err_to_http(request_id)?
.tables;
let rsp = self.client.send(req, true).await?;
let rsp = self.client.check_response(rsp).await?;
let tables = rsp.json::<ListTablesResponse>().await?.tables;
for table in &tables {
self.table_cache.insert(table.clone(), ()).await;
}
@@ -134,11 +130,13 @@ impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
.client
.post(&format!("/v1/table/{}/create/", options.name))
.body(data_buffer)
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
let (request_id, rsp) = self.client.send(req, false).await?;
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE)
// This is currently expected by LanceDb cloud but will be removed soon.
.header("x-request-id", "na");
let rsp = self.client.send(req, false).await?;
if rsp.status() == StatusCode::BAD_REQUEST {
let body = rsp.text().await.err_to_http(request_id.clone())?;
let body = rsp.text().await?;
if body.contains("already exists") {
return Err(crate::Error::TableAlreadyExists { name: options.name });
} else {
@@ -146,7 +144,7 @@ impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
}
}
self.client.check_response(&request_id, rsp).await?;
self.client.check_response(rsp).await?;
self.table_cache.insert(options.name.clone(), ()).await;
@@ -162,11 +160,11 @@ impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
let req = self
.client
.get(&format!("/v1/table/{}/describe/", options.name));
let (request_id, resp) = self.client.send(req, true).await?;
let resp = self.client.send(req, true).await?;
if resp.status() == StatusCode::NOT_FOUND {
return Err(crate::Error::TableNotFound { name: options.name });
}
self.client.check_response(&request_id, resp).await?;
self.client.check_response(resp).await?;
}
Ok(Table::new(Arc::new(RemoteTable::new(
@@ -180,8 +178,8 @@ impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
.client
.post(&format!("/v1/table/{}/rename/", current_name));
let req = req.json(&serde_json::json!({ "new_table_name": new_name }));
let (request_id, resp) = self.client.send(req, false).await?;
self.client.check_response(&request_id, resp).await?;
let resp = self.client.send(req, false).await?;
self.client.check_response(resp).await?;
self.table_cache.remove(current_name).await;
self.table_cache.insert(new_name.into(), ()).await;
Ok(())
@@ -189,8 +187,8 @@ impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
async fn drop_table(&self, name: &str) -> Result<()> {
let req = self.client.post(&format!("/v1/table/{}/drop/", name));
let (request_id, resp) = self.client.send(req, true).await?;
self.client.check_response(&request_id, resp).await?;
let resp = self.client.send(req, true).await?;
self.client.check_response(resp).await?;
self.table_cache.remove(name).await;
Ok(())
}
@@ -208,57 +206,16 @@ impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
#[cfg(test)]
mod tests {
use std::sync::{Arc, OnceLock};
use std::sync::Arc;
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator};
use arrow_schema::{DataType, Field, Schema};
use crate::{
remote::{ARROW_STREAM_CONTENT_TYPE, JSON_CONTENT_TYPE},
Connection, Error,
Connection,
};
#[tokio::test]
async fn test_retries() {
// We'll record the request_id here, to check it matches the one in the error.
let seen_request_id = Arc::new(OnceLock::new());
let seen_request_id_ref = seen_request_id.clone();
let conn = Connection::new_with_handler(move |request| {
// Request id should be the same on each retry.
let request_id = request.headers()["x-request-id"]
.to_str()
.unwrap()
.to_string();
let seen_id = seen_request_id_ref.get_or_init(|| request_id.clone());
assert_eq!(&request_id, seen_id);
http::Response::builder()
.status(500)
.body("internal server error")
.unwrap()
});
let result = conn.table_names().execute().await;
if let Err(Error::Retry {
request_id,
request_failures,
max_request_failures,
source,
..
}) = result
{
let expected_id = seen_request_id.get().unwrap();
assert_eq!(&request_id, expected_id);
assert_eq!(request_failures, max_request_failures);
assert!(
source.to_string().contains("internal server error"),
"source: {:?}",
source
);
} else {
panic!("unexpected result: {:?}", result);
};
}
#[tokio::test]
async fn test_table_names() {
let conn = Connection::new_with_handler(|request| {

View File

@@ -34,7 +34,6 @@ use crate::{
},
};
use super::client::RequestResultExt;
use super::client::{HttpSend, RestfulLanceDbClient, Sender};
use super::{ARROW_STREAM_CONTENT_TYPE, JSON_CONTENT_TYPE};
@@ -54,25 +53,15 @@ impl<S: HttpSend> RemoteTable<S> {
let request = self
.client
.post(&format!("/v1/table/{}/describe/", self.name));
let (request_id, response) = self.client.send(request, true).await?;
let response = self.client.send(request, true).await?;
let response = self.check_table_response(&request_id, response).await?;
let response = self.check_table_response(response).await?;
match response.text().await {
Ok(body) => serde_json::from_str(&body).map_err(|e| Error::Http {
source: format!("Failed to parse table description: {}", e).into(),
request_id,
status_code: None,
}),
Err(err) => {
let status_code = err.status();
Err(Error::Http {
source: Box::new(err),
request_id,
status_code,
})
}
}
let body = response.text().await?;
serde_json::from_str(&body).map_err(|e| Error::Http {
message: format!("Failed to parse table description: {}", e),
})
}
fn reader_as_body(data: Box<dyn RecordBatchReader + Send>) -> Result<reqwest::Body> {
@@ -98,23 +87,18 @@ impl<S: HttpSend> RemoteTable<S> {
Ok(reqwest::Body::wrap_stream(body_stream))
}
async fn check_table_response(
&self,
request_id: &str,
response: reqwest::Response,
) -> Result<reqwest::Response> {
async fn check_table_response(&self, response: reqwest::Response) -> Result<reqwest::Response> {
if response.status() == StatusCode::NOT_FOUND {
return Err(Error::TableNotFound {
name: self.name.clone(),
});
}
self.client.check_response(request_id, response).await
self.client.check_response(response).await
}
async fn read_arrow_stream(
&self,
request_id: &str,
body: reqwest::Response,
) -> Result<SendableRecordBatchStream> {
// Assert that the content type is correct
@@ -122,31 +106,24 @@ impl<S: HttpSend> RemoteTable<S> {
.headers()
.get(CONTENT_TYPE)
.ok_or_else(|| Error::Http {
source: "Missing content type".into(),
request_id: request_id.to_string(),
status_code: None,
message: "Missing content type".into(),
})?
.to_str()
.map_err(|e| Error::Http {
source: format!("Failed to parse content type: {}", e).into(),
request_id: request_id.to_string(),
status_code: None,
message: format!("Failed to parse content type: {}", e),
})?;
if content_type != ARROW_STREAM_CONTENT_TYPE {
return Err(Error::Http {
source: format!(
message: format!(
"Expected content type {}, got {}",
ARROW_STREAM_CONTENT_TYPE, content_type
)
.into(),
request_id: request_id.to_string(),
status_code: None,
),
});
}
// There isn't a way to actually stream this data yet. I have an upstream issue:
// https://github.com/apache/arrow-rs/issues/6420
let body = body.bytes().await.err_to_http(request_id.into())?;
let body = body.bytes().await?;
let reader = StreamReader::try_new(body.reader(), None)?;
let schema = reader.schema();
let stream = futures::stream::iter(reader).map_err(DataFusionError::from);
@@ -282,16 +259,14 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
request = request.json(&serde_json::json!({}));
}
let (request_id, response) = self.client.send(request, true).await?;
let response = self.client.send(request, true).await?;
let response = self.check_table_response(&request_id, response).await?;
let response = self.check_table_response(response).await?;
let body = response.text().await.err_to_http(request_id.clone())?;
let body = response.text().await?;
serde_json::from_str(&body).map_err(|e| Error::Http {
source: format!("Failed to parse row count: {}", e).into(),
request_id,
status_code: None,
message: format!("Failed to parse row count: {}", e),
})
}
async fn add(
@@ -313,9 +288,9 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
}
}
let (request_id, response) = self.client.send(request, false).await?;
let response = self.client.send(request, false).await?;
self.check_table_response(&request_id, response).await?;
self.check_table_response(response).await?;
Ok(())
}
@@ -364,9 +339,9 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
let request = request.json(&body);
let (request_id, response) = self.client.send(request, true).await?;
let response = self.client.send(request, true).await?;
let stream = self.read_arrow_stream(&request_id, response).await?;
let stream = self.read_arrow_stream(response).await?;
Ok(Arc::new(OneShotExec::new(stream)))
}
@@ -386,9 +361,9 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
let request = request.json(&body);
let (request_id, response) = self.client.send(request, true).await?;
let response = self.client.send(request, true).await?;
let stream = self.read_arrow_stream(&request_id, response).await?;
let stream = self.read_arrow_stream(response).await?;
Ok(DatasetRecordBatchStream::new(stream))
}
@@ -408,20 +383,17 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
"only_if": update.filter,
}));
let (request_id, response) = self.client.send(request, false).await?;
let response = self.client.send(request, false).await?;
let response = self.check_table_response(&request_id, response).await?;
let response = self.check_table_response(response).await?;
let body = response.text().await.err_to_http(request_id.clone())?;
let body = response.text().await?;
serde_json::from_str(&body).map_err(|e| Error::Http {
source: format!(
message: format!(
"Failed to parse updated rows result from response {}: {}",
body, e
)
.into(),
request_id,
status_code: None,
),
})
}
async fn delete(&self, predicate: &str) -> Result<()> {
@@ -430,8 +402,8 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
.client
.post(&format!("/v1/table/{}/delete/", self.name))
.json(&body);
let (request_id, response) = self.client.send(request, false).await?;
self.check_table_response(&request_id, response).await?;
let response = self.client.send(request, false).await?;
self.check_table_response(response).await?;
Ok(())
}
@@ -502,9 +474,9 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
let request = request.json(&body);
let (request_id, response) = self.client.send(request, false).await?;
let response = self.client.send(request, false).await?;
self.check_table_response(&request_id, response).await?;
self.check_table_response(response).await?;
Ok(())
}
@@ -523,9 +495,9 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE)
.body(body);
let (request_id, response) = self.client.send(request, false).await?;
let response = self.client.send(request, false).await?;
self.check_table_response(&request_id, response).await?;
self.check_table_response(response).await?;
Ok(())
}
@@ -553,79 +525,28 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
message: "drop_columns is not yet supported.".into(),
})
}
async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
// Make request to list the indices
let request = self
.client
.post(&format!("/v1/table/{}/index/list/", self.name));
let (request_id, response) = self.client.send(request, true).await?;
let response = self.check_table_response(&request_id, response).await?;
#[derive(Deserialize)]
struct ListIndicesResponse {
indexes: Vec<IndexConfigResponse>,
}
#[derive(Deserialize)]
struct IndexConfigResponse {
index_name: String,
columns: Vec<String>,
}
let body = response.text().await.err_to_http(request_id.clone())?;
let body: ListIndicesResponse = serde_json::from_str(&body).map_err(|err| Error::Http {
source: format!(
"Failed to parse list_indices response: {}, body: {}",
err, body
)
.into(),
request_id,
status_code: None,
})?;
// Make request to get stats for each index, so we get the index type.
// This is a bit inefficient, but it's the only way to get the index type.
let mut futures = Vec::with_capacity(body.indexes.len());
for index in body.indexes {
let future = async move {
match self.index_stats(&index.index_name).await {
Ok(Some(stats)) => Ok(Some(IndexConfig {
name: index.index_name,
index_type: stats.index_type,
columns: index.columns,
})),
Ok(None) => Ok(None), // The index must have been deleted since we listed it.
Err(e) => Err(e),
}
};
futures.push(future);
}
let results = futures::future::try_join_all(futures).await?;
let index_configs = results.into_iter().flatten().collect();
Ok(index_configs)
Err(Error::NotSupported {
message: "list_indices is not yet supported.".into(),
})
}
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
let request = self.client.post(&format!(
"/v1/table/{}/index/{}/stats/",
self.name, index_name
));
let (request_id, response) = self.client.send(request, true).await?;
let response = self.client.send(request, true).await?;
if response.status() == StatusCode::NOT_FOUND {
return Ok(None);
}
let response = self.check_table_response(&request_id, response).await?;
let response = self.check_table_response(response).await?;
let body = response.text().await.err_to_http(request_id.clone())?;
let body = response.text().await?;
let stats = serde_json::from_str(&body).map_err(|e| Error::Http {
source: format!("Failed to parse index statistics: {}", e).into(),
request_id,
status_code: None,
message: format!("Failed to parse index statistics: {}", e),
})?;
Ok(Some(stats))
@@ -1260,69 +1181,6 @@ mod tests {
}
}
#[tokio::test]
async fn test_list_indices() {
let table = Table::new_with_handler("my_table", |request| {
assert_eq!(request.method(), "POST");
let response_body = match request.url().path() {
"/v1/table/my_table/index/list/" => {
serde_json::json!({
"indexes": [
{
"index_name": "vector_idx",
"index_uuid": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
"columns": ["vector"],
"index_status": "done",
},
{
"index_name": "my_idx",
"index_uuid": "34255f64-5717-4562-b3fc-2c963f66afa6",
"columns": ["my_column"],
"index_status": "done",
},
]
})
}
"/v1/table/my_table/index/vector_idx/stats/" => {
serde_json::json!({
"num_indexed_rows": 100000,
"num_unindexed_rows": 0,
"index_type": "IVF_PQ",
"distance_type": "l2"
})
}
"/v1/table/my_table/index/my_idx/stats/" => {
serde_json::json!({
"num_indexed_rows": 100000,
"num_unindexed_rows": 0,
"index_type": "LABEL_LIST"
})
}
path => panic!("Unexpected path: {}", path),
};
http::Response::builder()
.status(200)
.body(serde_json::to_string(&response_body).unwrap())
.unwrap()
});
let indices = table.list_indices().await.unwrap();
let expected = vec![
IndexConfig {
name: "vector_idx".into(),
index_type: IndexType::IvfPq,
columns: vec!["vector".into()],
},
IndexConfig {
name: "my_idx".into(),
index_type: IndexType::LabelList,
columns: vec!["my_column".into()],
},
];
assert_eq!(indices, expected);
}
#[tokio::test]
async fn test_index_stats() {
let table = Table::new_with_handler("my_table", |request| {

View File

@@ -2110,6 +2110,7 @@ mod tests {
use arrow_schema::{DataType, Field, Schema, TimeUnit};
use futures::TryStreamExt;
use lance::dataset::{Dataset, WriteMode};
use lance::index::DatasetIndexInternalExt;
use lance::io::{ObjectStoreParams, WrappingObjectStore};
use rand::Rng;
use tempfile::tempdir;
@@ -3001,8 +3002,22 @@ mod tests {
let index_configs = table.list_indices().await.unwrap();
assert_eq!(index_configs.len(), 1);
let index = index_configs.into_iter().next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
// TODO: Fix via https://github.com/lancedb/lance/issues/2039
// assert_eq!(index.index_type, crate::index::IndexType::Bitmap);
assert_eq!(index.columns, vec!["category".to_string()]);
// For now, just open the index to verify its type
let lance_dataset = table.as_native().unwrap().dataset.get().await.unwrap();
let indices = lance_dataset
.load_indices_by_name(&index.name)
.await
.unwrap();
let index_meta = &indices[0];
let idx = lance_dataset
.open_scalar_index("category", &index_meta.uuid.to_string())
.await
.unwrap();
assert_eq!(idx.index_type(), IndexType::Bitmap);
}
#[tokio::test]
@@ -3071,57 +3086,22 @@ mod tests {
let index_configs = table.list_indices().await.unwrap();
assert_eq!(index_configs.len(), 1);
let index = index_configs.into_iter().next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::LabelList);
// TODO: Fix via https://github.com/lancedb/lance/issues/2039
// assert_eq!(index.index_type, crate::index::IndexType::LabelList);
assert_eq!(index.columns, vec!["tags".to_string()]);
}
#[tokio::test]
async fn test_create_inverted_index() {
let tmp_dir = tempdir().unwrap();
let uri = tmp_dir.path().to_str().unwrap();
let conn = ConnectBuilder::new(uri).execute().await.unwrap();
const WORDS: [&str; 3] = ["cat", "dog", "fish"];
let mut text_builder = StringBuilder::new();
let num_rows = 120;
for i in 0..num_rows {
text_builder.append_value(WORDS[i % 3]);
}
let text = Arc::new(text_builder.finish());
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("text", DataType::Utf8, true),
]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(Int32Array::from_iter_values(0..num_rows as i32)),
text,
],
)
.unwrap();
let table = conn
.create_table(
"test_bitmap",
RecordBatchIterator::new(vec![Ok(batch.clone())], batch.schema()),
)
.execute()
// For now, just open the index to verify its type
let lance_dataset = table.as_native().unwrap().dataset.get().await.unwrap();
let indices = lance_dataset
.load_indices_by_name(&index.name)
.await
.unwrap();
table
.create_index(&["text"], Index::FTS(Default::default()))
.execute()
let index_meta = &indices[0];
let idx = lance_dataset
.open_scalar_index("tags", &index_meta.uuid.to_string())
.await
.unwrap();
let index_configs = table.list_indices().await.unwrap();
assert_eq!(index_configs.len(), 1);
let index = index_configs.into_iter().next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::FTS);
assert_eq!(index.columns, vec!["text".to_string()]);
assert_eq!(index.name, "text_idx");
assert_eq!(idx.index_type(), IndexType::LabelList);
}
#[tokio::test]

View File

@@ -54,7 +54,9 @@ impl DatasetRef {
last_consistency_check,
..
} => {
dataset.checkout_latest().await?;
*dataset = dataset
.checkout_version(dataset.latest_version_id().await?)
.await?;
last_consistency_check.replace(Instant::now());
}
Self::TimeTravel { dataset, version } => {