diff --git a/.github/workflows/nodejs.yml b/.github/workflows/nodejs.yml index af2a4619..bb94e763 100644 --- a/.github/workflows/nodejs.yml +++ b/.github/workflows/nodejs.yml @@ -106,6 +106,18 @@ jobs: python ci/mock_openai.py & cd nodejs/examples npm test + - name: Check docs + run: | + # We run this as part of the job because the binary needs to be built + # first to export the types of the native code. + set -e + npm ci + npm run docs + if ! git diff --exit-code; then + echo "Docs need to be updated" + echo "Run 'npm run docs', fix any warnings, and commit the changes." + exit 1 + fi macos: timeout-minutes: 30 runs-on: "macos-14" diff --git a/Cargo.lock b/Cargo.lock index b566fd18..7b5882b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4089,7 +4089,7 @@ dependencies = [ [[package]] name = "lancedb" -version = "0.15.1-beta.1" +version = "0.15.1-beta.2" dependencies = [ "arrow", "arrow-array", @@ -4169,7 +4169,7 @@ dependencies = [ [[package]] name = "lancedb-node" -version = "0.15.1-beta.1" +version = "0.15.1-beta.2" dependencies = [ "arrow-array", "arrow-ipc", @@ -4194,7 +4194,7 @@ dependencies = [ [[package]] name = "lancedb-nodejs" -version = "0.15.1-beta.1" +version = "0.15.1-beta.2" dependencies = [ "arrow-array", "arrow-ipc", diff --git a/docs/src/js/README.md b/docs/src/js/README.md index fc2b81a5..2728d523 100644 --- a/docs/src/js/README.md +++ b/docs/src/js/README.md @@ -36,7 +36,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray(); console.log(results); ``` -The [quickstart](../basic.md) contains a more complete example. +The [quickstart](https://lancedb.github.io/lancedb/basic/) contains a more complete example. ## Development diff --git a/docs/src/js/classes/Connection.md b/docs/src/js/classes/Connection.md index 88eb5dc4..a21dffba 100644 --- a/docs/src/js/classes/Connection.md +++ b/docs/src/js/classes/Connection.md @@ -23,18 +23,6 @@ be closed when they are garbage collected. Any created tables are independent and will continue to work even if the underlying connection has been closed. -## Constructors - -### new Connection() - -```ts -new Connection(): Connection -``` - -#### Returns - -[`Connection`](Connection.md) - ## Methods ### close() @@ -71,7 +59,7 @@ Creates a new empty Table * **name**: `string` The name of the table. -* **schema**: `SchemaLike` +* **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md) The schema of the table * **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)> @@ -117,7 +105,7 @@ Creates a new Table and initialize it with new data. * **name**: `string` The name of the table. -* **data**: `TableLike` \| `Record`<`string`, `unknown`>[] +* **data**: [`TableLike`](../type-aliases/TableLike.md) \| `Record`<`string`, `unknown`>[] Non-empty Array of Records to be inserted into the table @@ -189,7 +177,7 @@ Open a table in the database. * **name**: `string` The name of the table -* **options?**: `Partial`<`OpenTableOptions`> +* **options?**: `Partial`<[`OpenTableOptions`](../interfaces/OpenTableOptions.md)> #### Returns diff --git a/docs/src/js/classes/Index.md b/docs/src/js/classes/Index.md index c6175328..1b73766b 100644 --- a/docs/src/js/classes/Index.md +++ b/docs/src/js/classes/Index.md @@ -72,11 +72,9 @@ The results of a full text search are ordered by relevance measured by BM25. You can combine filters with full text search. -For now, the full text search index only supports English, and doesn't support phrase search. - #### Parameters -* **options?**: `Partial`<`FtsOptions`> +* **options?**: `Partial`<[`FtsOptions`](../interfaces/FtsOptions.md)> #### Returns @@ -98,7 +96,7 @@ the vectors. #### Parameters -* **options?**: `Partial`<`HnswPqOptions`> +* **options?**: `Partial`<[`HnswPqOptions`](../interfaces/HnswPqOptions.md)> #### Returns @@ -120,7 +118,7 @@ the vectors. #### Parameters -* **options?**: `Partial`<`HnswSqOptions`> +* **options?**: `Partial`<[`HnswSqOptions`](../interfaces/HnswSqOptions.md)> #### Returns diff --git a/docs/src/js/classes/MergeInsertBuilder.md b/docs/src/js/classes/MergeInsertBuilder.md new file mode 100644 index 00000000..d72ea2ea --- /dev/null +++ b/docs/src/js/classes/MergeInsertBuilder.md @@ -0,0 +1,126 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / MergeInsertBuilder + +# Class: MergeInsertBuilder + +A builder used to create and run a merge insert operation + +## Constructors + +### new MergeInsertBuilder() + +```ts +new MergeInsertBuilder(native, schema): MergeInsertBuilder +``` + +Construct a MergeInsertBuilder. __Internal use only.__ + +#### Parameters + +* **native**: `NativeMergeInsertBuilder` + +* **schema**: `Schema`<`any`> \| `Promise`<`Schema`<`any`>> + +#### Returns + +[`MergeInsertBuilder`](MergeInsertBuilder.md) + +## Methods + +### execute() + +```ts +execute(data): Promise +``` + +Executes the merge insert operation + +Nothing is returned but the `Table` is updated + +#### Parameters + +* **data**: [`Data`](../type-aliases/Data.md) + +#### Returns + +`Promise`<`void`> + +*** + +### whenMatchedUpdateAll() + +```ts +whenMatchedUpdateAll(options?): MergeInsertBuilder +``` + +Rows that exist in both the source table (new data) and +the target table (old data) will be updated, replacing +the old row with the corresponding matching row. + +If there are multiple matches then the behavior is undefined. +Currently this causes multiple copies of the row to be created +but that behavior is subject to change. + +An optional condition may be specified. If it is, then only +matched rows that satisfy the condtion will be updated. Any +rows that do not satisfy the condition will be left as they +are. Failing to satisfy the condition does not cause a +"matched row" to become a "not matched" row. + +The condition should be an SQL string. Use the prefix +target. to refer to rows in the target table (old data) +and the prefix source. to refer to rows in the source +table (new data). + +For example, "target.last_update < source.last_update" + +#### Parameters + +* **options?** + +* **options.where?**: `string` + +#### Returns + +[`MergeInsertBuilder`](MergeInsertBuilder.md) + +*** + +### whenNotMatchedBySourceDelete() + +```ts +whenNotMatchedBySourceDelete(options?): MergeInsertBuilder +``` + +Rows that exist only in the target table (old data) will be +deleted. An optional condition can be provided to limit what +data is deleted. + +#### Parameters + +* **options?** + +* **options.where?**: `string` + An optional condition to limit what data is deleted + +#### Returns + +[`MergeInsertBuilder`](MergeInsertBuilder.md) + +*** + +### whenNotMatchedInsertAll() + +```ts +whenNotMatchedInsertAll(): MergeInsertBuilder +``` + +Rows that exist only in the source table (new data) should +be inserted into the target table. + +#### Returns + +[`MergeInsertBuilder`](MergeInsertBuilder.md) diff --git a/docs/src/js/classes/Query.md b/docs/src/js/classes/Query.md index 2021c5dd..34ff5dae 100644 --- a/docs/src/js/classes/Query.md +++ b/docs/src/js/classes/Query.md @@ -8,30 +8,14 @@ A builder for LanceDB queries. +## See + +[Table#query](Table.md#query), [Table#search](Table.md#search) + ## Extends - [`QueryBase`](QueryBase.md)<`NativeQuery`> -## Constructors - -### new Query() - -```ts -new Query(tbl): Query -``` - -#### Parameters - -* **tbl**: `Table` - -#### Returns - -[`Query`](Query.md) - -#### Overrides - -[`QueryBase`](QueryBase.md).[`constructor`](QueryBase.md#constructors) - ## Properties ### inner @@ -46,42 +30,6 @@ protected inner: Query | Promise; ## Methods -### \[asyncIterator\]() - -```ts -asyncIterator: AsyncIterator, any, undefined> -``` - -#### Returns - -`AsyncIterator`<`RecordBatch`<`any`>, `any`, `undefined`> - -#### Inherited from - -[`QueryBase`](QueryBase.md).[`[asyncIterator]`](QueryBase.md#%5Basynciterator%5D) - -*** - -### doCall() - -```ts -protected doCall(fn): void -``` - -#### Parameters - -* **fn** - -#### Returns - -`void` - -#### Inherited from - -[`QueryBase`](QueryBase.md).[`doCall`](QueryBase.md#docall) - -*** - ### execute() ```ts @@ -92,7 +40,7 @@ Execute the query and return the results as an #### Parameters -* **options?**: `Partial`<`QueryExecutionOptions`> +* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)> #### Returns @@ -161,7 +109,7 @@ fastSearch(): this Skip searching un-indexed data. This can make search faster, but will miss any data that is not yet indexed. -Use lancedb.Table#optimize to index all un-indexed data. +Use [Table#optimize](Table.md#optimize) to index all un-indexed data. #### Returns @@ -189,7 +137,7 @@ A filter statement to be applied to this query. `this` -#### Alias +#### See where @@ -213,7 +161,7 @@ fullTextSearch(query, options?): this * **query**: `string` -* **options?**: `Partial`<`FullTextSearchOptions`> +* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)> #### Returns @@ -250,26 +198,6 @@ called then every valid row from the table will be returned. *** -### nativeExecute() - -```ts -protected nativeExecute(options?): Promise -``` - -#### Parameters - -* **options?**: `Partial`<`QueryExecutionOptions`> - -#### Returns - -`Promise`<`RecordBatchIterator`> - -#### Inherited from - -[`QueryBase`](QueryBase.md).[`nativeExecute`](QueryBase.md#nativeexecute) - -*** - ### nearestTo() ```ts @@ -294,7 +222,7 @@ If there is more than one vector column you must use #### Parameters -* **vector**: `IntoVector` +* **vector**: [`IntoVector`](../type-aliases/IntoVector.md) #### Returns @@ -427,7 +355,7 @@ Collect the results as an array of objects. #### Parameters -* **options?**: `Partial`<`QueryExecutionOptions`> +* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)> #### Returns @@ -449,7 +377,7 @@ Collect the results as an Arrow #### Parameters -* **options?**: `Partial`<`QueryExecutionOptions`> +* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)> #### Returns diff --git a/docs/src/js/classes/QueryBase.md b/docs/src/js/classes/QueryBase.md index 1b734951..010e85d7 100644 --- a/docs/src/js/classes/QueryBase.md +++ b/docs/src/js/classes/QueryBase.md @@ -8,6 +8,11 @@ Common methods supported by all query types +## See + + - [Query](Query.md) + - [VectorQuery](VectorQuery.md) + ## Extended by - [`Query`](Query.md) @@ -21,22 +26,6 @@ Common methods supported by all query types - `AsyncIterable`<`RecordBatch`> -## Constructors - -### new QueryBase() - -```ts -protected new QueryBase(inner): QueryBase -``` - -#### Parameters - -* **inner**: `NativeQueryType` \| `Promise`<`NativeQueryType`> - -#### Returns - -[`QueryBase`](QueryBase.md)<`NativeQueryType`> - ## Properties ### inner @@ -47,38 +36,6 @@ protected inner: NativeQueryType | Promise; ## Methods -### \[asyncIterator\]() - -```ts -asyncIterator: AsyncIterator, any, undefined> -``` - -#### Returns - -`AsyncIterator`<`RecordBatch`<`any`>, `any`, `undefined`> - -#### Implementation of - -`AsyncIterable.[asyncIterator]` - -*** - -### doCall() - -```ts -protected doCall(fn): void -``` - -#### Parameters - -* **fn** - -#### Returns - -`void` - -*** - ### execute() ```ts @@ -89,7 +46,7 @@ Execute the query and return the results as an #### Parameters -* **options?**: `Partial`<`QueryExecutionOptions`> +* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)> #### Returns @@ -150,7 +107,7 @@ fastSearch(): this Skip searching un-indexed data. This can make search faster, but will miss any data that is not yet indexed. -Use lancedb.Table#optimize to index all un-indexed data. +Use [Table#optimize](Table.md#optimize) to index all un-indexed data. #### Returns @@ -174,7 +131,7 @@ A filter statement to be applied to this query. `this` -#### Alias +#### See where @@ -194,7 +151,7 @@ fullTextSearch(query, options?): this * **query**: `string` -* **options?**: `Partial`<`FullTextSearchOptions`> +* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)> #### Returns @@ -223,22 +180,6 @@ called then every valid row from the table will be returned. *** -### nativeExecute() - -```ts -protected nativeExecute(options?): Promise -``` - -#### Parameters - -* **options?**: `Partial`<`QueryExecutionOptions`> - -#### Returns - -`Promise`<`RecordBatchIterator`> - -*** - ### offset() ```ts @@ -314,7 +255,7 @@ Collect the results as an array of objects. #### Parameters -* **options?**: `Partial`<`QueryExecutionOptions`> +* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)> #### Returns @@ -332,7 +273,7 @@ Collect the results as an Arrow #### Parameters -* **options?**: `Partial`<`QueryExecutionOptions`> +* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)> #### Returns diff --git a/docs/src/js/classes/Table.md b/docs/src/js/classes/Table.md index 370a941c..aed26c4e 100644 --- a/docs/src/js/classes/Table.md +++ b/docs/src/js/classes/Table.md @@ -14,21 +14,13 @@ will be freed when the Table is garbage collected. To eagerly free the cache yo can call the `close` method. Once the Table is closed, it cannot be used for any further operations. +Tables are created using the methods [Connection#createTable](Connection.md#createtable) +and [Connection#createEmptyTable](Connection.md#createemptytable). Existing tables are opened +using [Connection#openTable](Connection.md#opentable). + Closing a table is optional. It not closed, it will be closed when it is garbage collected. -## Constructors - -### new Table() - -```ts -new Table(): Table -``` - -#### Returns - -[`Table`](Table.md) - ## Accessors ### name @@ -216,6 +208,9 @@ Indices on vector columns will speed up vector searches. Indices on scalar columns will speed up filtering (in both vector and non-vector searches) +We currently don't support custom named indexes. +The index name will always be `${column}_idx`. + #### Parameters * **column**: `string` @@ -226,11 +221,6 @@ vector and non-vector searches) `Promise`<`void`> -#### Note - -We currently don't support custom named indexes, -The index name will always be `${column}_idx` - #### Examples ```ts @@ -329,18 +319,14 @@ Drop an index from the table. * **name**: `string` The name of the index. + This does not delete the index from disk, it just removes it from the table. + To delete the index, run [Table#optimize](Table.md#optimize) after dropping the index. + Use [Table.listIndices](Table.md#listindices) to find the names of the indices. #### Returns `Promise`<`void`> -#### Note - -This does not delete the index from disk, it just removes it from the table. -To delete the index, run [Table#optimize](Table.md#optimize) after dropping the index. - -Use [Table.listIndices](Table.md#listindices) to find the names of the indices. - *** ### indexStats() @@ -404,7 +390,7 @@ List all the versions of the table #### Returns -`Promise`<`Version`[]> +`Promise`<[`Version`](../interfaces/Version.md)[]> *** @@ -420,7 +406,7 @@ abstract mergeInsert(on): MergeInsertBuilder #### Returns -`MergeInsertBuilder` +[`MergeInsertBuilder`](MergeInsertBuilder.md) *** @@ -464,7 +450,7 @@ Modeled after ``VACUUM`` in PostgreSQL. #### Returns -`Promise`<`OptimizeStats`> +`Promise`<[`OptimizeStats`](../interfaces/OptimizeStats.md)> *** @@ -581,7 +567,7 @@ Get the schema of the table. abstract search( query, queryType?, - ftsColumns?): VectorQuery | Query + ftsColumns?): Query | VectorQuery ``` Create a search query to find the nearest neighbors @@ -589,7 +575,7 @@ of the given query #### Parameters -* **query**: `string` \| `IntoVector` +* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) the query, a vector or string * **queryType?**: `string` @@ -603,7 +589,7 @@ of the given query #### Returns -[`VectorQuery`](VectorQuery.md) \| [`Query`](Query.md) +[`Query`](Query.md) \| [`VectorQuery`](VectorQuery.md) *** @@ -722,7 +708,7 @@ by `query`. #### Parameters -* **vector**: `IntoVector` +* **vector**: [`IntoVector`](../type-aliases/IntoVector.md) #### Returns @@ -745,38 +731,3 @@ Retrieve the version of the table #### Returns `Promise`<`number`> - -*** - -### parseTableData() - -```ts -static parseTableData( - data, - options?, - streaming?): Promise -``` - -#### Parameters - -* **data**: `TableLike` \| `Record`<`string`, `unknown`>[] - -* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)> - -* **streaming?**: `boolean` = `false` - -#### Returns - -`Promise`<`object`> - -##### buf - -```ts -buf: Buffer; -``` - -##### mode - -```ts -mode: string; -``` diff --git a/docs/src/js/classes/VectorQuery.md b/docs/src/js/classes/VectorQuery.md index b970ea7e..943288d7 100644 --- a/docs/src/js/classes/VectorQuery.md +++ b/docs/src/js/classes/VectorQuery.md @@ -10,30 +10,14 @@ A builder used to construct a vector search This builder can be reused to execute the query many times. +## See + +[Query#nearestTo](Query.md#nearestto) + ## Extends - [`QueryBase`](QueryBase.md)<`NativeVectorQuery`> -## Constructors - -### new VectorQuery() - -```ts -new VectorQuery(inner): VectorQuery -``` - -#### Parameters - -* **inner**: `VectorQuery` \| `Promise`<`VectorQuery`> - -#### Returns - -[`VectorQuery`](VectorQuery.md) - -#### Overrides - -[`QueryBase`](QueryBase.md).[`constructor`](QueryBase.md#constructors) - ## Properties ### inner @@ -48,22 +32,6 @@ protected inner: VectorQuery | Promise; ## Methods -### \[asyncIterator\]() - -```ts -asyncIterator: AsyncIterator, any, undefined> -``` - -#### Returns - -`AsyncIterator`<`RecordBatch`<`any`>, `any`, `undefined`> - -#### Inherited from - -[`QueryBase`](QueryBase.md).[`[asyncIterator]`](QueryBase.md#%5Basynciterator%5D) - -*** - ### addQueryVector() ```ts @@ -72,7 +40,7 @@ addQueryVector(vector): VectorQuery #### Parameters -* **vector**: `IntoVector` +* **vector**: [`IntoVector`](../type-aliases/IntoVector.md) #### Returns @@ -179,26 +147,6 @@ By default "l2" is used. *** -### doCall() - -```ts -protected doCall(fn): void -``` - -#### Parameters - -* **fn** - -#### Returns - -`void` - -#### Inherited from - -[`QueryBase`](QueryBase.md).[`doCall`](QueryBase.md#docall) - -*** - ### ef() ```ts @@ -233,7 +181,7 @@ Execute the query and return the results as an #### Parameters -* **options?**: `Partial`<`QueryExecutionOptions`> +* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)> #### Returns @@ -302,7 +250,7 @@ fastSearch(): this Skip searching un-indexed data. This can make search faster, but will miss any data that is not yet indexed. -Use lancedb.Table#optimize to index all un-indexed data. +Use [Table#optimize](Table.md#optimize) to index all un-indexed data. #### Returns @@ -330,7 +278,7 @@ A filter statement to be applied to this query. `this` -#### Alias +#### See where @@ -354,7 +302,7 @@ fullTextSearch(query, options?): this * **query**: `string` -* **options?**: `Partial`<`FullTextSearchOptions`> +* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)> #### Returns @@ -391,26 +339,6 @@ called then every valid row from the table will be returned. *** -### nativeExecute() - -```ts -protected nativeExecute(options?): Promise -``` - -#### Parameters - -* **options?**: `Partial`<`QueryExecutionOptions`> - -#### Returns - -`Promise`<`RecordBatchIterator`> - -#### Inherited from - -[`QueryBase`](QueryBase.md).[`nativeExecute`](QueryBase.md#nativeexecute) - -*** - ### nprobes() ```ts @@ -625,7 +553,7 @@ Collect the results as an array of objects. #### Parameters -* **options?**: `Partial`<`QueryExecutionOptions`> +* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)> #### Returns @@ -647,7 +575,7 @@ Collect the results as an Arrow #### Parameters -* **options?**: `Partial`<`QueryExecutionOptions`> +* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)> #### Returns diff --git a/docs/src/js/enumerations/WriteMode.md b/docs/src/js/enumerations/WriteMode.md deleted file mode 100644 index 2b00f3b2..00000000 --- a/docs/src/js/enumerations/WriteMode.md +++ /dev/null @@ -1,33 +0,0 @@ -[**@lancedb/lancedb**](../README.md) • **Docs** - -*** - -[@lancedb/lancedb](../globals.md) / WriteMode - -# Enumeration: WriteMode - -Write mode for writing a table. - -## Enumeration Members - -### Append - -```ts -Append: "Append"; -``` - -*** - -### Create - -```ts -Create: "Create"; -``` - -*** - -### Overwrite - -```ts -Overwrite: "Overwrite"; -``` diff --git a/docs/src/js/functions/connect.md b/docs/src/js/functions/connect.md index fe8b15e0..054928c0 100644 --- a/docs/src/js/functions/connect.md +++ b/docs/src/js/functions/connect.md @@ -6,10 +6,10 @@ # Function: connect() -## connect(uri, opts) +## connect(uri, options) ```ts -function connect(uri, opts?): Promise +function connect(uri, options?): Promise ``` Connect to a LanceDB instance at the given URI. @@ -26,7 +26,8 @@ Accepted formats: The uri of the database. If the database uri starts with `db://` then it connects to a remote database. -* **opts?**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)> +* **options?**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)> + The options to use when connecting to the database ### Returns @@ -49,10 +50,10 @@ const conn = await connect( }); ``` -## connect(opts) +## connect(options) ```ts -function connect(opts): Promise +function connect(options): Promise ``` Connect to a LanceDB instance at the given URI. @@ -65,7 +66,8 @@ Accepted formats: ### Parameters -* **opts**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)> & `object` +* **options**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)> & `object` + The options to use when connecting to the database ### Returns diff --git a/docs/src/js/functions/makeArrowTable.md b/docs/src/js/functions/makeArrowTable.md index a295f2c3..e8916612 100644 --- a/docs/src/js/functions/makeArrowTable.md +++ b/docs/src/js/functions/makeArrowTable.md @@ -57,6 +57,7 @@ rules are as follows: ## Example +```ts import { fromTableToBuffer, makeArrowTable } from "../arrow"; import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow"; @@ -78,42 +79,40 @@ The `vectorColumns` option can be used to support other vector column names and data types. ```ts - const schema = new Schema([ - new Field("a", new Float64()), - new Field("b", new Float64()), - new Field( - "vector", - new FixedSizeList(3, new Field("item", new Float32())) - ), - ]); - const table = makeArrowTable([ - { a: 1, b: 2, vector: [1, 2, 3] }, - { a: 4, b: 5, vector: [4, 5, 6] }, - { a: 7, b: 8, vector: [7, 8, 9] }, - ]); - assert.deepEqual(table.schema, schema); + new Field("a", new Float64()), + new Field("b", new Float64()), + new Field( + "vector", + new FixedSizeList(3, new Field("item", new Float32())) + ), +]); +const table = makeArrowTable([ + { a: 1, b: 2, vector: [1, 2, 3] }, + { a: 4, b: 5, vector: [4, 5, 6] }, + { a: 7, b: 8, vector: [7, 8, 9] }, +]); +assert.deepEqual(table.schema, schema); ``` You can specify the vector column types and names using the options as well -```typescript - +```ts const schema = new Schema([ - new Field('a', new Float64()), - new Field('b', new Float64()), - new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))), - new Field('vec2', new FixedSizeList(3, new Field('item', new Float16()))) - ]); + new Field('a', new Float64()), + new Field('b', new Float64()), + new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))), + new Field('vec2', new FixedSizeList(3, new Field('item', new Float16()))) +]); const table = makeArrowTable([ - { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] }, - { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] }, - { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] } - ], { - vectorColumns: { - vec1: { type: new Float16() }, - vec2: { type: new Float16() } - } - } + { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] }, + { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] }, + { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] } +], { + vectorColumns: { + vec1: { type: new Float16() }, + vec2: { type: new Float16() } + } +} assert.deepEqual(table.schema, schema) ``` diff --git a/docs/src/js/globals.md b/docs/src/js/globals.md index 7e4758fd..13810a5c 100644 --- a/docs/src/js/globals.md +++ b/docs/src/js/globals.md @@ -9,15 +9,12 @@ - [embedding](namespaces/embedding/README.md) - [rerankers](namespaces/rerankers/README.md) -## Enumerations - -- [WriteMode](enumerations/WriteMode.md) - ## Classes - [Connection](classes/Connection.md) - [Index](classes/Index.md) - [MakeArrowTableOptions](classes/MakeArrowTableOptions.md) +- [MergeInsertBuilder](classes/MergeInsertBuilder.md) - [Query](classes/Query.md) - [QueryBase](classes/QueryBase.md) - [RecordBatchIterator](classes/RecordBatchIterator.md) @@ -31,23 +28,39 @@ - [AddDataOptions](interfaces/AddDataOptions.md) - [ClientConfig](interfaces/ClientConfig.md) - [ColumnAlteration](interfaces/ColumnAlteration.md) +- [CompactionStats](interfaces/CompactionStats.md) - [ConnectionOptions](interfaces/ConnectionOptions.md) - [CreateTableOptions](interfaces/CreateTableOptions.md) - [ExecutableQuery](interfaces/ExecutableQuery.md) +- [FtsOptions](interfaces/FtsOptions.md) +- [FullTextSearchOptions](interfaces/FullTextSearchOptions.md) +- [HnswPqOptions](interfaces/HnswPqOptions.md) +- [HnswSqOptions](interfaces/HnswSqOptions.md) - [IndexConfig](interfaces/IndexConfig.md) - [IndexOptions](interfaces/IndexOptions.md) - [IndexStatistics](interfaces/IndexStatistics.md) - [IvfPqOptions](interfaces/IvfPqOptions.md) +- [OpenTableOptions](interfaces/OpenTableOptions.md) - [OptimizeOptions](interfaces/OptimizeOptions.md) +- [OptimizeStats](interfaces/OptimizeStats.md) +- [QueryExecutionOptions](interfaces/QueryExecutionOptions.md) +- [RemovalStats](interfaces/RemovalStats.md) - [RetryConfig](interfaces/RetryConfig.md) - [TableNamesOptions](interfaces/TableNamesOptions.md) - [TimeoutConfig](interfaces/TimeoutConfig.md) - [UpdateOptions](interfaces/UpdateOptions.md) -- [WriteOptions](interfaces/WriteOptions.md) +- [Version](interfaces/Version.md) ## Type Aliases - [Data](type-aliases/Data.md) +- [DataLike](type-aliases/DataLike.md) +- [FieldLike](type-aliases/FieldLike.md) +- [IntoSql](type-aliases/IntoSql.md) +- [IntoVector](type-aliases/IntoVector.md) +- [RecordBatchLike](type-aliases/RecordBatchLike.md) +- [SchemaLike](type-aliases/SchemaLike.md) +- [TableLike](type-aliases/TableLike.md) ## Functions diff --git a/docs/src/js/interfaces/CompactionStats.md b/docs/src/js/interfaces/CompactionStats.md new file mode 100644 index 00000000..4c128f46 --- /dev/null +++ b/docs/src/js/interfaces/CompactionStats.md @@ -0,0 +1,49 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / CompactionStats + +# Interface: CompactionStats + +Statistics about a compaction operation. + +## Properties + +### filesAdded + +```ts +filesAdded: number; +``` + +The number of new, compacted data files added + +*** + +### filesRemoved + +```ts +filesRemoved: number; +``` + +The number of data files removed + +*** + +### fragmentsAdded + +```ts +fragmentsAdded: number; +``` + +The number of new, compacted fragments added + +*** + +### fragmentsRemoved + +```ts +fragmentsRemoved: number; +``` + +The number of fragments removed diff --git a/docs/src/js/interfaces/FtsOptions.md b/docs/src/js/interfaces/FtsOptions.md new file mode 100644 index 00000000..af774cb1 --- /dev/null +++ b/docs/src/js/interfaces/FtsOptions.md @@ -0,0 +1,103 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / FtsOptions + +# Interface: FtsOptions + +Options to create a full text search index + +## Properties + +### asciiFolding? + +```ts +optional asciiFolding: boolean; +``` + +whether to remove punctuation + +*** + +### baseTokenizer? + +```ts +optional baseTokenizer: "raw" | "simple" | "whitespace"; +``` + +The tokenizer to use when building the index. +The default is "simple". + +The following tokenizers are available: + +"simple" - Simple tokenizer. This tokenizer splits the text into tokens using whitespace and punctuation as a delimiter. + +"whitespace" - Whitespace tokenizer. This tokenizer splits the text into tokens using whitespace as a delimiter. + +"raw" - Raw tokenizer. This tokenizer does not split the text into tokens and indexes the entire text as a single token. + +*** + +### language? + +```ts +optional language: string; +``` + +language for stemming and stop words +this is only used when `stem` or `remove_stop_words` is true + +*** + +### lowercase? + +```ts +optional lowercase: boolean; +``` + +whether to lowercase tokens + +*** + +### maxTokenLength? + +```ts +optional maxTokenLength: number; +``` + +maximum token length +tokens longer than this length will be ignored + +*** + +### removeStopWords? + +```ts +optional removeStopWords: boolean; +``` + +whether to remove stop words + +*** + +### stem? + +```ts +optional stem: boolean; +``` + +whether to stem tokens + +*** + +### withPosition? + +```ts +optional withPosition: boolean; +``` + +Whether to build the index with positions. +True by default. +If set to false, the index will not store the positions of the tokens in the text, +which will make the index smaller and faster to build, but will not support phrase queries. diff --git a/docs/src/js/interfaces/FullTextSearchOptions.md b/docs/src/js/interfaces/FullTextSearchOptions.md new file mode 100644 index 00000000..311f56a4 --- /dev/null +++ b/docs/src/js/interfaces/FullTextSearchOptions.md @@ -0,0 +1,22 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / FullTextSearchOptions + +# Interface: FullTextSearchOptions + +Options that control the behavior of a full text search + +## Properties + +### columns? + +```ts +optional columns: string | string[]; +``` + +The columns to search + +If not specified, all indexed columns will be searched. +For now, only one column can be searched. diff --git a/docs/src/js/interfaces/HnswPqOptions.md b/docs/src/js/interfaces/HnswPqOptions.md new file mode 100644 index 00000000..4dde12b1 --- /dev/null +++ b/docs/src/js/interfaces/HnswPqOptions.md @@ -0,0 +1,149 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / HnswPqOptions + +# Interface: HnswPqOptions + +Options to create an `HNSW_PQ` index + +## Properties + +### distanceType? + +```ts +optional distanceType: "l2" | "cosine" | "dot"; +``` + +The distance metric used to train the index. + +Default value is "l2". + +The following distance types are available: + +"l2" - Euclidean distance. This is a very common distance metric that +accounts for both magnitude and direction when determining the distance +between vectors. L2 distance has a range of [0, ∞). + +"cosine" - Cosine distance. Cosine distance is a distance metric +calculated from the cosine similarity between two vectors. Cosine +similarity is a measure of similarity between two non-zero vectors of an +inner product space. It is defined to equal the cosine of the angle +between them. Unlike L2, the cosine distance is not affected by the +magnitude of the vectors. Cosine distance has a range of [0, 2]. + +"dot" - Dot product. Dot distance is the dot product of two vectors. Dot +distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their +L2 norm is 1), then dot distance is equivalent to the cosine distance. + +*** + +### efConstruction? + +```ts +optional efConstruction: number; +``` + +The number of candidates to evaluate during the construction of the HNSW graph. + +The default value is 300. + +This value controls the tradeoff between build speed and accuracy. +The higher the value the more accurate the build but the slower it will be. +150 to 300 is the typical range. 100 is a minimum for good quality search +results. In most cases, there is no benefit to setting this higher than 500. +This value should be set to a value that is not less than `ef` in the search phase. + +*** + +### m? + +```ts +optional m: number; +``` + +The number of neighbors to select for each vector in the HNSW graph. + +The default value is 20. + +This value controls the tradeoff between search speed and accuracy. +The higher the value the more accurate the search but the slower it will be. + +*** + +### maxIterations? + +```ts +optional maxIterations: number; +``` + +Max iterations to train kmeans. + +The default value is 50. + +When training an IVF index we use kmeans to calculate the partitions. This parameter +controls how many iterations of kmeans to run. + +Increasing this might improve the quality of the index but in most cases the parameter +is unused because kmeans will converge with fewer iterations. The parameter is only +used in cases where kmeans does not appear to converge. In those cases it is unlikely +that setting this larger will lead to the index converging anyways. + +*** + +### numPartitions? + +```ts +optional numPartitions: number; +``` + +The number of IVF partitions to create. + +For HNSW, we recommend a small number of partitions. Setting this to 1 works +well for most tables. For very large tables, training just one HNSW graph +will require too much memory. Each partition becomes its own HNSW graph, so +setting this value higher reduces the peak memory use of training. + +*** + +### numSubVectors? + +```ts +optional numSubVectors: number; +``` + +Number of sub-vectors of PQ. + +This value controls how much the vector is compressed during the quantization step. +The more sub vectors there are the less the vector is compressed. The default is +the dimension of the vector divided by 16. If the dimension is not evenly divisible +by 16 we use the dimension divded by 8. + +The above two cases are highly preferred. Having 8 or 16 values per subvector allows +us to use efficient SIMD instructions. + +If the dimension is not visible by 8 then we use 1 subvector. This is not ideal and +will likely result in poor performance. + +*** + +### sampleRate? + +```ts +optional sampleRate: number; +``` + +The rate used to calculate the number of training vectors for kmeans. + +Default value is 256. + +When an IVF index is trained, we need to calculate partitions. These are groups +of vectors that are similar to each other. To do this we use an algorithm called kmeans. + +Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a +random sample of the data. This parameter controls the size of the sample. The total +number of vectors used to train the index is `sample_rate * num_partitions`. + +Increasing this value might improve the quality of the index but in most cases the +default should be sufficient. diff --git a/docs/src/js/interfaces/HnswSqOptions.md b/docs/src/js/interfaces/HnswSqOptions.md new file mode 100644 index 00000000..e365388c --- /dev/null +++ b/docs/src/js/interfaces/HnswSqOptions.md @@ -0,0 +1,128 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / HnswSqOptions + +# Interface: HnswSqOptions + +Options to create an `HNSW_SQ` index + +## Properties + +### distanceType? + +```ts +optional distanceType: "l2" | "cosine" | "dot"; +``` + +The distance metric used to train the index. + +Default value is "l2". + +The following distance types are available: + +"l2" - Euclidean distance. This is a very common distance metric that +accounts for both magnitude and direction when determining the distance +between vectors. L2 distance has a range of [0, ∞). + +"cosine" - Cosine distance. Cosine distance is a distance metric +calculated from the cosine similarity between two vectors. Cosine +similarity is a measure of similarity between two non-zero vectors of an +inner product space. It is defined to equal the cosine of the angle +between them. Unlike L2, the cosine distance is not affected by the +magnitude of the vectors. Cosine distance has a range of [0, 2]. + +"dot" - Dot product. Dot distance is the dot product of two vectors. Dot +distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their +L2 norm is 1), then dot distance is equivalent to the cosine distance. + +*** + +### efConstruction? + +```ts +optional efConstruction: number; +``` + +The number of candidates to evaluate during the construction of the HNSW graph. + +The default value is 300. + +This value controls the tradeoff between build speed and accuracy. +The higher the value the more accurate the build but the slower it will be. +150 to 300 is the typical range. 100 is a minimum for good quality search +results. In most cases, there is no benefit to setting this higher than 500. +This value should be set to a value that is not less than `ef` in the search phase. + +*** + +### m? + +```ts +optional m: number; +``` + +The number of neighbors to select for each vector in the HNSW graph. + +The default value is 20. + +This value controls the tradeoff between search speed and accuracy. +The higher the value the more accurate the search but the slower it will be. + +*** + +### maxIterations? + +```ts +optional maxIterations: number; +``` + +Max iterations to train kmeans. + +The default value is 50. + +When training an IVF index we use kmeans to calculate the partitions. This parameter +controls how many iterations of kmeans to run. + +Increasing this might improve the quality of the index but in most cases the parameter +is unused because kmeans will converge with fewer iterations. The parameter is only +used in cases where kmeans does not appear to converge. In those cases it is unlikely +that setting this larger will lead to the index converging anyways. + +*** + +### numPartitions? + +```ts +optional numPartitions: number; +``` + +The number of IVF partitions to create. + +For HNSW, we recommend a small number of partitions. Setting this to 1 works +well for most tables. For very large tables, training just one HNSW graph +will require too much memory. Each partition becomes its own HNSW graph, so +setting this value higher reduces the peak memory use of training. + +*** + +### sampleRate? + +```ts +optional sampleRate: number; +``` + +The rate used to calculate the number of training vectors for kmeans. + +Default value is 256. + +When an IVF index is trained, we need to calculate partitions. These are groups +of vectors that are similar to each other. To do this we use an algorithm called kmeans. + +Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a +random sample of the data. This parameter controls the size of the sample. The total +number of vectors used to train the index is `sample_rate * num_partitions`. + +Increasing this value might improve the quality of the index but in most cases the +default should be sufficient. diff --git a/docs/src/js/interfaces/OpenTableOptions.md b/docs/src/js/interfaces/OpenTableOptions.md new file mode 100644 index 00000000..dab6294d --- /dev/null +++ b/docs/src/js/interfaces/OpenTableOptions.md @@ -0,0 +1,40 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / OpenTableOptions + +# Interface: OpenTableOptions + +## Properties + +### indexCacheSize? + +```ts +optional indexCacheSize: number; +``` + +Set the size of the index cache, specified as a number of entries + +The exact meaning of an "entry" will depend on the type of index: +- IVF: there is one entry for each IVF partition +- BTREE: there is one entry for the entire index + +This cache applies to the entire opened table, across all indices. +Setting this value higher will increase performance on larger datasets +at the expense of more RAM + +*** + +### storageOptions? + +```ts +optional storageOptions: Record; +``` + +Configuration for object storage. + +Options already set on the connection will be inherited by the table, +but can be overridden here. + +The available options are described at https://lancedb.github.io/lancedb/guides/storage/ diff --git a/docs/src/js/interfaces/OptimizeStats.md b/docs/src/js/interfaces/OptimizeStats.md new file mode 100644 index 00000000..40a750f0 --- /dev/null +++ b/docs/src/js/interfaces/OptimizeStats.md @@ -0,0 +1,29 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / OptimizeStats + +# Interface: OptimizeStats + +Statistics about an optimize operation + +## Properties + +### compaction + +```ts +compaction: CompactionStats; +``` + +Statistics about the compaction operation + +*** + +### prune + +```ts +prune: RemovalStats; +``` + +Statistics about the removal operation diff --git a/docs/src/js/interfaces/QueryExecutionOptions.md b/docs/src/js/interfaces/QueryExecutionOptions.md new file mode 100644 index 00000000..dd3495bd --- /dev/null +++ b/docs/src/js/interfaces/QueryExecutionOptions.md @@ -0,0 +1,22 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / QueryExecutionOptions + +# Interface: QueryExecutionOptions + +Options that control the behavior of a particular query execution + +## Properties + +### maxBatchLength? + +```ts +optional maxBatchLength: number; +``` + +The maximum number of rows to return in a single batch + +Batches may have fewer rows if the underlying data is stored +in smaller chunks. diff --git a/docs/src/js/interfaces/RemovalStats.md b/docs/src/js/interfaces/RemovalStats.md new file mode 100644 index 00000000..9015e8df --- /dev/null +++ b/docs/src/js/interfaces/RemovalStats.md @@ -0,0 +1,29 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / RemovalStats + +# Interface: RemovalStats + +Statistics about a cleanup operation + +## Properties + +### bytesRemoved + +```ts +bytesRemoved: number; +``` + +The number of bytes removed + +*** + +### oldVersionsRemoved + +```ts +oldVersionsRemoved: number; +``` + +The number of old versions removed diff --git a/docs/src/js/interfaces/Version.md b/docs/src/js/interfaces/Version.md new file mode 100644 index 00000000..a4aceb9b --- /dev/null +++ b/docs/src/js/interfaces/Version.md @@ -0,0 +1,31 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / Version + +# Interface: Version + +## Properties + +### metadata + +```ts +metadata: Record; +``` + +*** + +### timestamp + +```ts +timestamp: Date; +``` + +*** + +### version + +```ts +version: number; +``` diff --git a/docs/src/js/interfaces/WriteOptions.md b/docs/src/js/interfaces/WriteOptions.md deleted file mode 100644 index 09c78e76..00000000 --- a/docs/src/js/interfaces/WriteOptions.md +++ /dev/null @@ -1,19 +0,0 @@ -[**@lancedb/lancedb**](../README.md) • **Docs** - -*** - -[@lancedb/lancedb](../globals.md) / WriteOptions - -# Interface: WriteOptions - -Write options when creating a Table. - -## Properties - -### mode? - -```ts -optional mode: WriteMode; -``` - -Write mode for writing to a table. diff --git a/docs/src/js/namespaces/embedding/README.md b/docs/src/js/namespaces/embedding/README.md index 17d05fb8..157018e1 100644 --- a/docs/src/js/namespaces/embedding/README.md +++ b/docs/src/js/namespaces/embedding/README.md @@ -17,6 +17,14 @@ ### Interfaces - [EmbeddingFunctionConfig](interfaces/EmbeddingFunctionConfig.md) +- [EmbeddingFunctionConstructor](interfaces/EmbeddingFunctionConstructor.md) +- [EmbeddingFunctionCreate](interfaces/EmbeddingFunctionCreate.md) +- [FieldOptions](interfaces/FieldOptions.md) +- [FunctionOptions](interfaces/FunctionOptions.md) + +### Type Aliases + +- [CreateReturnType](type-aliases/CreateReturnType.md) ### Functions diff --git a/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md b/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md index 7e09e691..24c54915 100644 --- a/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md +++ b/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md @@ -16,7 +16,7 @@ An embedding function that automatically creates vector representation for a giv • **T** = `any` -• **M** *extends* `FunctionOptions` = `FunctionOptions` +• **M** *extends* [`FunctionOptions`](../interfaces/FunctionOptions.md) = [`FunctionOptions`](../interfaces/FunctionOptions.md) ## Constructors @@ -118,16 +118,16 @@ sourceField is used in combination with `LanceSchema` to provide a declarative d #### Parameters -* **optionsOrDatatype**: `DataType`<`Type`, `any`> \| `Partial`<`FieldOptions`<`DataType`<`Type`, `any`>>> +* **optionsOrDatatype**: `DataType`<`Type`, `any`> \| `Partial`<[`FieldOptions`](../interfaces/FieldOptions.md)<`DataType`<`Type`, `any`>>> The options for the field or the datatype #### Returns -[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>>] +[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>>] #### See -lancedb.LanceSchema +[LanceSchema](../functions/LanceSchema.md) *** @@ -178,12 +178,13 @@ vectorField is used in combination with `LanceSchema` to provide a declarative d #### Parameters -* **optionsOrDatatype?**: `DataType`<`Type`, `any`> \| `Partial`<`FieldOptions`<`DataType`<`Type`, `any`>>> +* **optionsOrDatatype?**: `DataType`<`Type`, `any`> \| `Partial`<[`FieldOptions`](../interfaces/FieldOptions.md)<`DataType`<`Type`, `any`>>> + The options for the field #### Returns -[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>>] +[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>>] #### See -lancedb.LanceSchema +[LanceSchema](../functions/LanceSchema.md) diff --git a/docs/src/js/namespaces/embedding/classes/EmbeddingFunctionRegistry.md b/docs/src/js/namespaces/embedding/classes/EmbeddingFunctionRegistry.md index 099d07ce..e01078ab 100644 --- a/docs/src/js/namespaces/embedding/classes/EmbeddingFunctionRegistry.md +++ b/docs/src/js/namespaces/embedding/classes/EmbeddingFunctionRegistry.md @@ -51,7 +51,7 @@ Fetch an embedding function by name #### Type Parameters -• **T** *extends* [`EmbeddingFunction`](EmbeddingFunction.md)<`unknown`, `FunctionOptions`> +• **T** *extends* [`EmbeddingFunction`](EmbeddingFunction.md)<`unknown`, [`FunctionOptions`](../interfaces/FunctionOptions.md)> #### Parameters @@ -60,7 +60,7 @@ Fetch an embedding function by name #### Returns -`undefined` \| `EmbeddingFunctionCreate`<`T`> +`undefined` \| [`EmbeddingFunctionCreate`](../interfaces/EmbeddingFunctionCreate.md)<`T`> *** @@ -104,7 +104,7 @@ Register an embedding function #### Type Parameters -• **T** *extends* `EmbeddingFunctionConstructor`<[`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>> = `EmbeddingFunctionConstructor`<[`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>> +• **T** *extends* [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)<[`EmbeddingFunction`](EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>> = [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)<[`EmbeddingFunction`](EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>> #### Parameters diff --git a/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md b/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md index 7a8aa145..2cc13bef 100644 --- a/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md +++ b/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md @@ -14,7 +14,7 @@ an abstract class for implementing embedding functions that take text as input ## Type Parameters -• **M** *extends* `FunctionOptions` = `FunctionOptions` +• **M** *extends* [`FunctionOptions`](../interfaces/FunctionOptions.md) = [`FunctionOptions`](../interfaces/FunctionOptions.md) ## Constructors @@ -158,11 +158,11 @@ sourceField is used in combination with `LanceSchema` to provide a declarative d #### Returns -[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>>] +[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>>] #### See -lancedb.LanceSchema +[LanceSchema](../functions/LanceSchema.md) #### Overrides @@ -221,15 +221,16 @@ vectorField is used in combination with `LanceSchema` to provide a declarative d #### Parameters -* **optionsOrDatatype?**: `DataType`<`Type`, `any`> \| `Partial`<`FieldOptions`<`DataType`<`Type`, `any`>>> +* **optionsOrDatatype?**: `DataType`<`Type`, `any`> \| `Partial`<[`FieldOptions`](../interfaces/FieldOptions.md)<`DataType`<`Type`, `any`>>> + The options for the field #### Returns -[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>>] +[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>>] #### See -lancedb.LanceSchema +[LanceSchema](../functions/LanceSchema.md) #### Inherited from diff --git a/docs/src/js/namespaces/embedding/functions/LanceSchema.md b/docs/src/js/namespaces/embedding/functions/LanceSchema.md index 31aa7e94..22c430cf 100644 --- a/docs/src/js/namespaces/embedding/functions/LanceSchema.md +++ b/docs/src/js/namespaces/embedding/functions/LanceSchema.md @@ -14,7 +14,7 @@ Create a schema with embedding functions. ## Parameters -* **fields**: `Record`<`string`, `object` \| [`object`, `Map`<`string`, [`EmbeddingFunction`](../classes/EmbeddingFunction.md)<`any`, `FunctionOptions`>>]> +* **fields**: `Record`<`string`, `object` \| [`object`, `Map`<`string`, [`EmbeddingFunction`](../classes/EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>>]> ## Returns diff --git a/docs/src/js/namespaces/embedding/functions/register.md b/docs/src/js/namespaces/embedding/functions/register.md index 520357a4..4f5f4d9e 100644 --- a/docs/src/js/namespaces/embedding/functions/register.md +++ b/docs/src/js/namespaces/embedding/functions/register.md @@ -20,7 +20,7 @@ function register(name?): (ctor) => any ### Parameters -* **ctor**: `EmbeddingFunctionConstructor`<[`EmbeddingFunction`](../classes/EmbeddingFunction.md)<`any`, `FunctionOptions`>> +* **ctor**: [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)<[`EmbeddingFunction`](../classes/EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>> ### Returns diff --git a/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionConstructor.md b/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionConstructor.md new file mode 100644 index 00000000..bdab1eab --- /dev/null +++ b/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionConstructor.md @@ -0,0 +1,27 @@ +[**@lancedb/lancedb**](../../../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / EmbeddingFunctionConstructor + +# Interface: EmbeddingFunctionConstructor<T> + +## Type Parameters + +• **T** *extends* [`EmbeddingFunction`](../classes/EmbeddingFunction.md) = [`EmbeddingFunction`](../classes/EmbeddingFunction.md) + +## Constructors + +### new EmbeddingFunctionConstructor() + +```ts +new EmbeddingFunctionConstructor(modelOptions?): T +``` + +#### Parameters + +* **modelOptions?**: `T`\[`"TOptions"`\] + +#### Returns + +`T` diff --git a/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionCreate.md b/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionCreate.md new file mode 100644 index 00000000..3c751e26 --- /dev/null +++ b/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionCreate.md @@ -0,0 +1,27 @@ +[**@lancedb/lancedb**](../../../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / EmbeddingFunctionCreate + +# Interface: EmbeddingFunctionCreate<T> + +## Type Parameters + +• **T** *extends* [`EmbeddingFunction`](../classes/EmbeddingFunction.md) + +## Methods + +### create() + +```ts +create(options?): CreateReturnType +``` + +#### Parameters + +* **options?**: `T`\[`"TOptions"`\] + +#### Returns + +[`CreateReturnType`](../type-aliases/CreateReturnType.md)<`T`> diff --git a/docs/src/js/namespaces/embedding/interfaces/FieldOptions.md b/docs/src/js/namespaces/embedding/interfaces/FieldOptions.md new file mode 100644 index 00000000..4dd9eda4 --- /dev/null +++ b/docs/src/js/namespaces/embedding/interfaces/FieldOptions.md @@ -0,0 +1,27 @@ +[**@lancedb/lancedb**](../../../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / FieldOptions + +# Interface: FieldOptions<T> + +## Type Parameters + +• **T** *extends* `DataType` = `DataType` + +## Properties + +### datatype + +```ts +datatype: T; +``` + +*** + +### dims? + +```ts +optional dims: number; +``` diff --git a/docs/src/js/namespaces/embedding/interfaces/FunctionOptions.md b/docs/src/js/namespaces/embedding/interfaces/FunctionOptions.md new file mode 100644 index 00000000..a4a83d25 --- /dev/null +++ b/docs/src/js/namespaces/embedding/interfaces/FunctionOptions.md @@ -0,0 +1,13 @@ +[**@lancedb/lancedb**](../../../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / FunctionOptions + +# Interface: FunctionOptions + +Options for a given embedding function + +## Indexable + + \[`key`: `string`\]: `any` diff --git a/docs/src/js/namespaces/embedding/type-aliases/CreateReturnType.md b/docs/src/js/namespaces/embedding/type-aliases/CreateReturnType.md new file mode 100644 index 00000000..789082be --- /dev/null +++ b/docs/src/js/namespaces/embedding/type-aliases/CreateReturnType.md @@ -0,0 +1,15 @@ +[**@lancedb/lancedb**](../../../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / CreateReturnType + +# Type Alias: CreateReturnType<T> + +```ts +type CreateReturnType: T extends object ? Promise : T; +``` + +## Type Parameters + +• **T** diff --git a/docs/src/js/namespaces/rerankers/classes/RRFReranker.md b/docs/src/js/namespaces/rerankers/classes/RRFReranker.md index 7beb9aff..d054b7f0 100644 --- a/docs/src/js/namespaces/rerankers/classes/RRFReranker.md +++ b/docs/src/js/namespaces/rerankers/classes/RRFReranker.md @@ -8,24 +8,6 @@ Reranks the results using the Reciprocal Rank Fusion (RRF) algorithm. -Internally this uses the Rust implementation - -## Constructors - -### new RRFReranker() - -```ts -new RRFReranker(inner): RRFReranker -``` - -#### Parameters - -* **inner**: `RrfReranker` - -#### Returns - -[`RRFReranker`](RRFReranker.md) - ## Methods ### rerankHybrid() diff --git a/docs/src/js/type-aliases/DataLike.md b/docs/src/js/type-aliases/DataLike.md new file mode 100644 index 00000000..f9a343ed --- /dev/null +++ b/docs/src/js/type-aliases/DataLike.md @@ -0,0 +1,11 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / DataLike + +# Type Alias: DataLike + +```ts +type DataLike: Data | object; +``` diff --git a/docs/src/js/type-aliases/FieldLike.md b/docs/src/js/type-aliases/FieldLike.md new file mode 100644 index 00000000..052d1070 --- /dev/null +++ b/docs/src/js/type-aliases/FieldLike.md @@ -0,0 +1,11 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / FieldLike + +# Type Alias: FieldLike + +```ts +type FieldLike: Field | object; +``` diff --git a/docs/src/js/type-aliases/IntoSql.md b/docs/src/js/type-aliases/IntoSql.md new file mode 100644 index 00000000..276ca263 --- /dev/null +++ b/docs/src/js/type-aliases/IntoSql.md @@ -0,0 +1,19 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / IntoSql + +# Type Alias: IntoSql + +```ts +type IntoSql: + | string + | number + | boolean + | null + | Date + | ArrayBufferLike + | Buffer + | IntoSql[]; +``` diff --git a/docs/src/js/type-aliases/IntoVector.md b/docs/src/js/type-aliases/IntoVector.md new file mode 100644 index 00000000..813d3cdc --- /dev/null +++ b/docs/src/js/type-aliases/IntoVector.md @@ -0,0 +1,11 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / IntoVector + +# Type Alias: IntoVector + +```ts +type IntoVector: Float32Array | Float64Array | number[] | Promise; +``` diff --git a/docs/src/js/type-aliases/RecordBatchLike.md b/docs/src/js/type-aliases/RecordBatchLike.md new file mode 100644 index 00000000..2d7d48bb --- /dev/null +++ b/docs/src/js/type-aliases/RecordBatchLike.md @@ -0,0 +1,11 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / RecordBatchLike + +# Type Alias: RecordBatchLike + +```ts +type RecordBatchLike: RecordBatch | object; +``` diff --git a/docs/src/js/type-aliases/SchemaLike.md b/docs/src/js/type-aliases/SchemaLike.md new file mode 100644 index 00000000..31b3d16a --- /dev/null +++ b/docs/src/js/type-aliases/SchemaLike.md @@ -0,0 +1,11 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / SchemaLike + +# Type Alias: SchemaLike + +```ts +type SchemaLike: Schema | object; +``` diff --git a/docs/src/js/type-aliases/TableLike.md b/docs/src/js/type-aliases/TableLike.md new file mode 100644 index 00000000..b38a111b --- /dev/null +++ b/docs/src/js/type-aliases/TableLike.md @@ -0,0 +1,11 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / TableLike + +# Type Alias: TableLike + +```ts +type TableLike: ArrowTable | object; +``` diff --git a/nodejs/README.md b/nodejs/README.md index 6d514de3..4446855d 100644 --- a/nodejs/README.md +++ b/nodejs/README.md @@ -32,7 +32,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray(); console.log(results); ``` -The [quickstart](../basic.md) contains a more complete example. +The [quickstart](https://lancedb.github.io/lancedb/basic/) contains a more complete example. ## Development diff --git a/nodejs/lancedb/arrow.ts b/nodejs/lancedb/arrow.ts index 48c60e19..cedd08ce 100644 --- a/nodejs/lancedb/arrow.ts +++ b/nodejs/lancedb/arrow.ts @@ -257,6 +257,7 @@ export class MakeArrowTableOptions { * - Record => Struct * - Array => List * @example + * ```ts * import { fromTableToBuffer, makeArrowTable } from "../arrow"; * import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow"; * @@ -278,43 +279,41 @@ export class MakeArrowTableOptions { * names and data types. * * ```ts - * * const schema = new Schema([ - new Field("a", new Float64()), - new Field("b", new Float64()), - new Field( - "vector", - new FixedSizeList(3, new Field("item", new Float32())) - ), - ]); - const table = makeArrowTable([ - { a: 1, b: 2, vector: [1, 2, 3] }, - { a: 4, b: 5, vector: [4, 5, 6] }, - { a: 7, b: 8, vector: [7, 8, 9] }, - ]); - assert.deepEqual(table.schema, schema); + * new Field("a", new Float64()), + * new Field("b", new Float64()), + * new Field( + * "vector", + * new FixedSizeList(3, new Field("item", new Float32())) + * ), + * ]); + * const table = makeArrowTable([ + * { a: 1, b: 2, vector: [1, 2, 3] }, + * { a: 4, b: 5, vector: [4, 5, 6] }, + * { a: 7, b: 8, vector: [7, 8, 9] }, + * ]); + * assert.deepEqual(table.schema, schema); * ``` * * You can specify the vector column types and names using the options as well * - * ```typescript - * + * ```ts * const schema = new Schema([ - new Field('a', new Float64()), - new Field('b', new Float64()), - new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))), - new Field('vec2', new FixedSizeList(3, new Field('item', new Float16()))) - ]); + * new Field('a', new Float64()), + * new Field('b', new Float64()), + * new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))), + * new Field('vec2', new FixedSizeList(3, new Field('item', new Float16()))) + * ]); * const table = makeArrowTable([ - { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] }, - { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] }, - { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] } - ], { - vectorColumns: { - vec1: { type: new Float16() }, - vec2: { type: new Float16() } - } - } + * { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] }, + * { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] }, + * { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] } + * ], { + * vectorColumns: { + * vec1: { type: new Float16() }, + * vec2: { type: new Float16() } + * } + * } * assert.deepEqual(table.schema, schema) * ``` */ diff --git a/nodejs/lancedb/connection.ts b/nodejs/lancedb/connection.ts index cea2c380..da3d1d70 100644 --- a/nodejs/lancedb/connection.ts +++ b/nodejs/lancedb/connection.ts @@ -1,10 +1,23 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The LanceDB Authors -import { Data, Schema, SchemaLike, TableLike } from "./arrow"; -import { fromTableToBuffer, makeEmptyTable } from "./arrow"; +import { + Data, + Schema, + SchemaLike, + TableLike, + fromTableToStreamBuffer, + isArrowTable, + makeArrowTable, +} from "./arrow"; +import { + Table as ArrowTable, + fromTableToBuffer, + makeEmptyTable, +} from "./arrow"; import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry"; import { Connection as LanceDbConnection } from "./native"; +import { sanitizeTable } from "./sanitize"; import { LocalTable, Table } from "./table"; export interface CreateTableOptions { @@ -116,6 +129,7 @@ export interface TableNamesOptions { * * Any created tables are independent and will continue to work even if * the underlying connection has been closed. + * @hideconstructor */ export abstract class Connection { [Symbol.for("nodejs.util.inspect.custom")](): string { @@ -203,9 +217,11 @@ export abstract class Connection { abstract dropTable(name: string): Promise; } +/** @hideconstructor */ export class LocalConnection extends Connection { readonly inner: LanceDbConnection; + /** @hidden */ constructor(inner: LanceDbConnection) { super(); this.inner = inner; @@ -255,7 +271,7 @@ export class LocalConnection extends Connection { if (data === undefined) { throw new Error("data is required"); } - const { buf, mode } = await Table.parseTableData(data, options); + const { buf, mode } = await parseTableData(data, options); let dataStorageVersion = "stable"; if (options?.dataStorageVersion !== undefined) { dataStorageVersion = options.dataStorageVersion; @@ -357,3 +373,38 @@ function camelToSnakeCase(camel: string): string { } return result; } + +async function parseTableData( + data: Record[] | TableLike, + options?: Partial, + streaming = false, +) { + let mode: string = options?.mode ?? "create"; + const existOk = options?.existOk ?? false; + + if (mode === "create" && existOk) { + mode = "exist_ok"; + } + + let table: ArrowTable; + if (isArrowTable(data)) { + table = sanitizeTable(data); + } else { + table = makeArrowTable(data as Record[], options); + } + if (streaming) { + const buf = await fromTableToStreamBuffer( + table, + options?.embeddingFunction, + options?.schema, + ); + return { buf, mode }; + } else { + const buf = await fromTableToBuffer( + table, + options?.embeddingFunction, + options?.schema, + ); + return { buf, mode }; + } +} diff --git a/nodejs/lancedb/embedding/embedding_function.ts b/nodejs/lancedb/embedding/embedding_function.ts index b1fa7fd4..4d00eb29 100644 --- a/nodejs/lancedb/embedding/embedding_function.ts +++ b/nodejs/lancedb/embedding/embedding_function.ts @@ -78,7 +78,7 @@ export abstract class EmbeddingFunction< * * @param optionsOrDatatype - The options for the field or the datatype * - * @see {@link lancedb.LanceSchema} + * @see {@link LanceSchema} */ sourceField( optionsOrDatatype: Partial | DataType, @@ -100,9 +100,9 @@ export abstract class EmbeddingFunction< /** * vectorField is used in combination with `LanceSchema` to provide a declarative data model * - * @param options - The options for the field + * @param optionsOrDatatype - The options for the field * - * @see {@link lancedb.LanceSchema} + * @see {@link LanceSchema} */ vectorField( optionsOrDatatype?: Partial | DataType, diff --git a/nodejs/lancedb/embedding/index.ts b/nodejs/lancedb/embedding/index.ts index 723e1e9c..d0ffaec0 100644 --- a/nodejs/lancedb/embedding/index.ts +++ b/nodejs/lancedb/embedding/index.ts @@ -6,7 +6,13 @@ import { sanitizeType } from "../sanitize"; import { EmbeddingFunction } from "./embedding_function"; import { EmbeddingFunctionConfig, getRegistry } from "./registry"; -export { EmbeddingFunction, TextEmbeddingFunction } from "./embedding_function"; +export { + FieldOptions, + EmbeddingFunction, + TextEmbeddingFunction, + FunctionOptions, + EmbeddingFunctionConstructor, +} from "./embedding_function"; export * from "./registry"; diff --git a/nodejs/lancedb/embedding/registry.ts b/nodejs/lancedb/embedding/registry.ts index 31cc29c0..2f33ac91 100644 --- a/nodejs/lancedb/embedding/registry.ts +++ b/nodejs/lancedb/embedding/registry.ts @@ -7,11 +7,11 @@ import { } from "./embedding_function"; import "reflect-metadata"; -type CreateReturnType = T extends { init: () => Promise } +export type CreateReturnType = T extends { init: () => Promise } ? Promise : T; -interface EmbeddingFunctionCreate { +export interface EmbeddingFunctionCreate { create(options?: T["TOptions"]): CreateReturnType; } @@ -33,8 +33,6 @@ export class EmbeddingFunctionRegistry { /** * Register an embedding function - * @param name The name of the function - * @param func The function to register * @throws Error if the function is already registered */ register< diff --git a/nodejs/lancedb/index.ts b/nodejs/lancedb/index.ts index 1102695f..66500c68 100644 --- a/nodejs/lancedb/index.ts +++ b/nodejs/lancedb/index.ts @@ -13,8 +13,6 @@ import { } from "./native.js"; export { - WriteOptions, - WriteMode, AddColumnsSql, ColumnAlteration, ConnectionOptions, @@ -23,6 +21,9 @@ export { ClientConfig, TimeoutConfig, RetryConfig, + OptimizeStats, + CompactionStats, + RemovalStats, } from "./native.js"; export { @@ -36,6 +37,7 @@ export { Connection, CreateTableOptions, TableNamesOptions, + OpenTableOptions, } from "./connection"; export { @@ -43,15 +45,41 @@ export { Query, QueryBase, VectorQuery, + QueryExecutionOptions, + FullTextSearchOptions, RecordBatchIterator, } from "./query"; -export { Index, IndexOptions, IvfPqOptions } from "./indices"; +export { + Index, + IndexOptions, + IvfPqOptions, + HnswPqOptions, + HnswSqOptions, + FtsOptions, +} from "./indices"; -export { Table, AddDataOptions, UpdateOptions, OptimizeOptions } from "./table"; +export { + Table, + AddDataOptions, + UpdateOptions, + OptimizeOptions, + Version, +} from "./table"; + +export { MergeInsertBuilder } from "./merge"; export * as embedding from "./embedding"; export * as rerankers from "./rerankers"; +export { + SchemaLike, + TableLike, + FieldLike, + RecordBatchLike, + DataLike, + IntoVector, +} from "./arrow"; +export { IntoSql } from "./util"; /** * Connect to a LanceDB instance at the given URI. @@ -64,6 +92,7 @@ export * as rerankers from "./rerankers"; * @param {string} uri - The uri of the database. If the database uri starts * with `db://` then it connects to a remote database. * @see {@link ConnectionOptions} for more details on the URI format. + * @param options - The options to use when connecting to the database * @example * ```ts * const conn = await connect("/path/to/database"); @@ -78,7 +107,7 @@ export * as rerankers from "./rerankers"; */ export async function connect( uri: string, - opts?: Partial, + options?: Partial, ): Promise; /** * Connect to a LanceDB instance at the given URI. @@ -99,17 +128,17 @@ export async function connect( * ``` */ export async function connect( - opts: Partial & { uri: string }, + options: Partial & { uri: string }, ): Promise; export async function connect( uriOrOptions: string | (Partial & { uri: string }), - opts: Partial = {}, + options: Partial = {}, ): Promise { let uri: string | undefined; if (typeof uriOrOptions !== "string") { - const { uri: uri_, ...options } = uriOrOptions; + const { uri: uri_, ...opts } = uriOrOptions; uri = uri_; - opts = options; + options = opts; } else { uri = uriOrOptions; } @@ -118,10 +147,10 @@ export async function connect( throw new Error("uri is required"); } - opts = (opts as ConnectionOptions) ?? {}; - (opts).storageOptions = cleanseStorageOptions( - (opts).storageOptions, + options = (options as ConnectionOptions) ?? {}; + (options).storageOptions = cleanseStorageOptions( + (options).storageOptions, ); - const nativeConn = await LanceDbConnection.new(uri, opts); + const nativeConn = await LanceDbConnection.new(uri, options); return new LocalConnection(nativeConn); } diff --git a/nodejs/lancedb/indices.ts b/nodejs/lancedb/indices.ts index f853770d..7c27e42e 100644 --- a/nodejs/lancedb/indices.ts +++ b/nodejs/lancedb/indices.ts @@ -481,8 +481,6 @@ export class Index { * The results of a full text search are ordered by relevance measured by BM25. * * You can combine filters with full text search. - * - * For now, the full text search index only supports English, and doesn't support phrase search. */ static fts(options?: Partial) { return new Index( diff --git a/nodejs/lancedb/query.ts b/nodejs/lancedb/query.ts index dad825f9..2c1a1b05 100644 --- a/nodejs/lancedb/query.ts +++ b/nodejs/lancedb/query.ts @@ -93,10 +93,19 @@ export interface FullTextSearchOptions { columns?: string | string[]; } -/** Common methods supported by all query types */ +/** Common methods supported by all query types + * + * @see {@link Query} + * @see {@link VectorQuery} + * + * @hideconstructor + */ export class QueryBase implements AsyncIterable { + /** + * @hidden + */ protected constructor( protected inner: NativeQueryType | Promise, ) { @@ -104,6 +113,9 @@ export class QueryBase } // call a function on the inner (either a promise or the actual object) + /** + * @hidden + */ protected doCall(fn: (inner: NativeQueryType) => void) { if (this.inner instanceof Promise) { this.inner = this.inner.then((inner) => { @@ -132,7 +144,7 @@ export class QueryBase } /** * A filter statement to be applied to this query. - * @alias where + * @see where * @deprecated Use `where` instead */ filter(predicate: string): this { @@ -235,7 +247,7 @@ export class QueryBase * Skip searching un-indexed data. This can make search faster, but will miss * any data that is not yet indexed. * - * Use {@link lancedb.Table#optimize} to index all un-indexed data. + * Use {@link Table#optimize} to index all un-indexed data. */ fastSearch(): this { this.doCall((inner: NativeQueryType) => inner.fastSearch()); @@ -254,6 +266,9 @@ export class QueryBase return this; } + /** + * @hidden + */ protected nativeExecute( options?: Partial, ): Promise { @@ -281,6 +296,9 @@ export class QueryBase return new RecordBatchIterator(this.nativeExecute(options)); } + /** + * @hidden + */ // biome-ignore lint/suspicious/noExplicitAny: skip [Symbol.asyncIterator](): AsyncIterator> { const promise = this.nativeExecute(); @@ -343,8 +361,15 @@ export interface ExecutableQuery {} * A builder used to construct a vector search * * This builder can be reused to execute the query many times. + * + * @see {@link Query#nearestTo} + * + * @hideconstructor */ export class VectorQuery extends QueryBase { + /** + * @hidden + */ constructor(inner: NativeVectorQuery | Promise) { super(inner); } @@ -570,8 +595,16 @@ export class VectorQuery extends QueryBase { } } -/** A builder for LanceDB queries. */ +/** A builder for LanceDB queries. + * + * @see {@link Table#query}, {@link Table#search} + * + * @hideconstructor + */ export class Query extends QueryBase { + /** + * @hidden + */ constructor(tbl: NativeTable) { super(tbl.query()); } diff --git a/nodejs/lancedb/rerankers/rrf.ts b/nodejs/lancedb/rerankers/rrf.ts index 1d89c076..654e6130 100644 --- a/nodejs/lancedb/rerankers/rrf.ts +++ b/nodejs/lancedb/rerankers/rrf.ts @@ -8,11 +8,12 @@ import { RrfReranker as NativeRRFReranker } from "../native"; /** * Reranks the results using the Reciprocal Rank Fusion (RRF) algorithm. * - * Internally this uses the Rust implementation + * @hideconstructor */ export class RRFReranker { private inner: NativeRRFReranker; + /** @ignore */ constructor(inner: NativeRRFReranker) { this.inner = inner; } diff --git a/nodejs/lancedb/table.ts b/nodejs/lancedb/table.ts index 4f6115e4..c0ac1d10 100644 --- a/nodejs/lancedb/table.ts +++ b/nodejs/lancedb/table.ts @@ -6,15 +6,9 @@ import { Data, IntoVector, Schema, - TableLike, fromDataToBuffer, - fromTableToBuffer, - fromTableToStreamBuffer, - isArrowTable, - makeArrowTable, tableFromIPC, } from "./arrow"; -import { CreateTableOptions } from "./connection"; import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry"; import { IndexOptions } from "./indices"; @@ -28,7 +22,6 @@ import { Table as _NativeTable, } from "./native"; import { Query, VectorQuery } from "./query"; -import { sanitizeTable } from "./sanitize"; import { IntoSql, toSQL } from "./util"; export { IndexConfig } from "./native"; @@ -91,8 +84,14 @@ export interface Version { * can call the `close` method. Once the Table is closed, it cannot be used for any * further operations. * + * Tables are created using the methods {@link Connection#createTable} + * and {@link Connection#createEmptyTable}. Existing tables are opened + * using {@link Connection#openTable}. + * * Closing a table is optional. It not closed, it will be closed when it is garbage * collected. + * + * @hideconstructor */ export abstract class Table { [Symbol.for("nodejs.util.inspect.custom")](): string { @@ -190,8 +189,9 @@ export abstract class Table { * Indices on scalar columns will speed up filtering (in both * vector and non-vector searches) * - * @note We currently don't support custom named indexes, - * The index name will always be `${column}_idx` + * We currently don't support custom named indexes. + * The index name will always be `${column}_idx`. + * * @example * // If the column has a vector (fixed size list) data type then * // an IvfPq vector index will be created. @@ -221,7 +221,7 @@ export abstract class Table { * * @param name The name of the index. * - * @note This does not delete the index from disk, it just removes it from the table. + * This does not delete the index from disk, it just removes it from the table. * To delete the index, run {@link Table#optimize} after dropping the index. * * Use {@link Table.listIndices} to find the names of the indices. @@ -432,41 +432,6 @@ export abstract class Table { * Use {@link Table.listIndices} to find the names of the indices. */ abstract indexStats(name: string): Promise; - - static async parseTableData( - data: Record[] | TableLike, - options?: Partial, - streaming = false, - ) { - let mode: string = options?.mode ?? "create"; - const existOk = options?.existOk ?? false; - - if (mode === "create" && existOk) { - mode = "exist_ok"; - } - - let table: ArrowTable; - if (isArrowTable(data)) { - table = sanitizeTable(data); - } else { - table = makeArrowTable(data as Record[], options); - } - if (streaming) { - const buf = await fromTableToStreamBuffer( - table, - options?.embeddingFunction, - options?.schema, - ); - return { buf, mode }; - } else { - const buf = await fromTableToBuffer( - table, - options?.embeddingFunction, - options?.schema, - ); - return { buf, mode }; - } - } } export class LocalTable extends Table { diff --git a/nodejs/package.json b/nodejs/package.json index d6e32cd1..3ee37052 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -77,7 +77,7 @@ "build": "npm run build:debug && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts && shx cp lancedb/*.node dist/", "build-release": "npm run build:release && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts", "lint-ci": "biome ci .", - "docs": "typedoc --plugin typedoc-plugin-markdown --out ../docs/src/js lancedb/index.ts", + "docs": "typedoc --plugin typedoc-plugin-markdown --treatWarningsAsErrors --out ../docs/src/js lancedb/index.ts", "postdocs": "node typedoc_post_process.js", "lint": "biome check . && biome format .", "lint-fix": "biome check --write . && biome format --write .", diff --git a/nodejs/src/lib.rs b/nodejs/src/lib.rs index 5e8c3950..7674e370 100644 --- a/nodejs/src/lib.rs +++ b/nodejs/src/lib.rs @@ -49,21 +49,6 @@ pub struct ConnectionOptions { pub host_override: Option, } -/// Write mode for writing a table. -#[napi(string_enum)] -pub enum WriteMode { - Create, - Append, - Overwrite, -} - -/// Write options when creating a Table. -#[napi(object)] -pub struct WriteOptions { - /// Write mode for writing to a table. - pub mode: Option, -} - #[napi(object)] pub struct OpenTableOptions { pub storage_options: Option>, diff --git a/nodejs/typedoc.json b/nodejs/typedoc.json index baf2d4ef..72784657 100644 --- a/nodejs/typedoc.json +++ b/nodejs/typedoc.json @@ -1,11 +1,9 @@ { "intentionallyNotExported": [ - "lancedb/native.d.ts:Connection", - "lancedb/native.d.ts:Index", "lancedb/native.d.ts:Query", "lancedb/native.d.ts:VectorQuery", "lancedb/native.d.ts:RecordBatchIterator", - "lancedb/native.d.ts:Table" + "lancedb/native.d.ts:NativeMergeInsertBuilder" ], "useHTMLEncodedBrackets": true, "useCodeBlocks": true, diff --git a/nodejs/typedoc_post_process.js b/nodejs/typedoc_post_process.js index 62070ceb..e23c91e1 100644 --- a/nodejs/typedoc_post_process.js +++ b/nodejs/typedoc_post_process.js @@ -40,23 +40,28 @@ function processDirectory(directoryPath) { function processContents(contents) { // This changes the parameters section to put the parameter description on // the same line as the bullet with the parameter name and type. - return contents.replace(/(## Parameters[\s\S]*?)(?=##|$)/g, (match) => { - let lines = match - .split("\n") - .map((line) => line.trim()) + return ( + contents + .replace(/(## Parameters[\s\S]*?)(?=##|$)/g, (match) => { + let lines = match + .split("\n") + .map((line) => line.trim()) - .filter((line) => line !== "") - .map((line) => { - if (line.startsWith("##")) { - return line; - } else if (line.startsWith("•")) { - return "\n*" + line.substring(1); - } else { - return " " + line; - } - }); - return lines.join("\n") + "\n\n"; - }); + .filter((line) => line !== "") + .map((line) => { + if (line.startsWith("##")) { + return line; + } else if (line.startsWith("•")) { + return "\n*" + line.substring(1); + } else { + return " " + line; + } + }); + return lines.join("\n") + "\n\n"; + }) + // Also trim trailing whitespace + .replace(/([^ \t])[ \t]+\n/g, "$1\n") + ); } // Start processing from the root directory