ci(node): check docs in CI (#2084)

* Make `npm run docs` fail if there are any warnings. This will catch
items missing from the API reference.
* Add a check in our CI to make sure `npm run dos` runs without warnings
and doesn't generate any new files (indicating it might be out-of-date.
* Hide constructors that aren't user facing.
* Remove unused enum `WriteMode`.

Closes #2068
This commit is contained in:
Will Jones
2025-01-30 16:06:06 -08:00
committed by GitHub
parent 25c17ebf4e
commit e05c0cd87e
59 changed files with 1287 additions and 597 deletions

View File

@@ -32,7 +32,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
console.log(results);
```
The [quickstart](../basic.md) contains a more complete example.
The [quickstart](https://lancedb.github.io/lancedb/basic/) contains a more complete example.
## Development

View File

@@ -257,6 +257,7 @@ export class MakeArrowTableOptions {
* - Record<String, any> => Struct
* - Array<any> => List
* @example
* ```ts
* import { fromTableToBuffer, makeArrowTable } from "../arrow";
* import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
*
@@ -278,43 +279,41 @@ export class MakeArrowTableOptions {
* names and data types.
*
* ```ts
*
* const schema = new Schema([
new Field("a", new Float64()),
new Field("b", new Float64()),
new Field(
"vector",
new FixedSizeList(3, new Field("item", new Float32()))
),
]);
const table = makeArrowTable([
{ a: 1, b: 2, vector: [1, 2, 3] },
{ a: 4, b: 5, vector: [4, 5, 6] },
{ a: 7, b: 8, vector: [7, 8, 9] },
]);
assert.deepEqual(table.schema, schema);
* new Field("a", new Float64()),
* new Field("b", new Float64()),
* new Field(
* "vector",
* new FixedSizeList(3, new Field("item", new Float32()))
* ),
* ]);
* const table = makeArrowTable([
* { a: 1, b: 2, vector: [1, 2, 3] },
* { a: 4, b: 5, vector: [4, 5, 6] },
* { a: 7, b: 8, vector: [7, 8, 9] },
* ]);
* assert.deepEqual(table.schema, schema);
* ```
*
* You can specify the vector column types and names using the options as well
*
* ```typescript
*
* ```ts
* const schema = new Schema([
new Field('a', new Float64()),
new Field('b', new Float64()),
new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
]);
* new Field('a', new Float64()),
* new Field('b', new Float64()),
* new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
* new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
* ]);
* const table = makeArrowTable([
{ a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
{ a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
{ a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
], {
vectorColumns: {
vec1: { type: new Float16() },
vec2: { type: new Float16() }
}
}
* { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
* { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
* { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
* ], {
* vectorColumns: {
* vec1: { type: new Float16() },
* vec2: { type: new Float16() }
* }
* }
* assert.deepEqual(table.schema, schema)
* ```
*/

View File

@@ -1,10 +1,23 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import { Data, Schema, SchemaLike, TableLike } from "./arrow";
import { fromTableToBuffer, makeEmptyTable } from "./arrow";
import {
Data,
Schema,
SchemaLike,
TableLike,
fromTableToStreamBuffer,
isArrowTable,
makeArrowTable,
} from "./arrow";
import {
Table as ArrowTable,
fromTableToBuffer,
makeEmptyTable,
} from "./arrow";
import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
import { Connection as LanceDbConnection } from "./native";
import { sanitizeTable } from "./sanitize";
import { LocalTable, Table } from "./table";
export interface CreateTableOptions {
@@ -116,6 +129,7 @@ export interface TableNamesOptions {
*
* Any created tables are independent and will continue to work even if
* the underlying connection has been closed.
* @hideconstructor
*/
export abstract class Connection {
[Symbol.for("nodejs.util.inspect.custom")](): string {
@@ -203,9 +217,11 @@ export abstract class Connection {
abstract dropTable(name: string): Promise<void>;
}
/** @hideconstructor */
export class LocalConnection extends Connection {
readonly inner: LanceDbConnection;
/** @hidden */
constructor(inner: LanceDbConnection) {
super();
this.inner = inner;
@@ -255,7 +271,7 @@ export class LocalConnection extends Connection {
if (data === undefined) {
throw new Error("data is required");
}
const { buf, mode } = await Table.parseTableData(data, options);
const { buf, mode } = await parseTableData(data, options);
let dataStorageVersion = "stable";
if (options?.dataStorageVersion !== undefined) {
dataStorageVersion = options.dataStorageVersion;
@@ -357,3 +373,38 @@ function camelToSnakeCase(camel: string): string {
}
return result;
}
async function parseTableData(
data: Record<string, unknown>[] | TableLike,
options?: Partial<CreateTableOptions>,
streaming = false,
) {
let mode: string = options?.mode ?? "create";
const existOk = options?.existOk ?? false;
if (mode === "create" && existOk) {
mode = "exist_ok";
}
let table: ArrowTable;
if (isArrowTable(data)) {
table = sanitizeTable(data);
} else {
table = makeArrowTable(data as Record<string, unknown>[], options);
}
if (streaming) {
const buf = await fromTableToStreamBuffer(
table,
options?.embeddingFunction,
options?.schema,
);
return { buf, mode };
} else {
const buf = await fromTableToBuffer(
table,
options?.embeddingFunction,
options?.schema,
);
return { buf, mode };
}
}

View File

@@ -78,7 +78,7 @@ export abstract class EmbeddingFunction<
*
* @param optionsOrDatatype - The options for the field or the datatype
*
* @see {@link lancedb.LanceSchema}
* @see {@link LanceSchema}
*/
sourceField(
optionsOrDatatype: Partial<FieldOptions> | DataType,
@@ -100,9 +100,9 @@ export abstract class EmbeddingFunction<
/**
* vectorField is used in combination with `LanceSchema` to provide a declarative data model
*
* @param options - The options for the field
* @param optionsOrDatatype - The options for the field
*
* @see {@link lancedb.LanceSchema}
* @see {@link LanceSchema}
*/
vectorField(
optionsOrDatatype?: Partial<FieldOptions> | DataType,

View File

@@ -6,7 +6,13 @@ import { sanitizeType } from "../sanitize";
import { EmbeddingFunction } from "./embedding_function";
import { EmbeddingFunctionConfig, getRegistry } from "./registry";
export { EmbeddingFunction, TextEmbeddingFunction } from "./embedding_function";
export {
FieldOptions,
EmbeddingFunction,
TextEmbeddingFunction,
FunctionOptions,
EmbeddingFunctionConstructor,
} from "./embedding_function";
export * from "./registry";

View File

@@ -7,11 +7,11 @@ import {
} from "./embedding_function";
import "reflect-metadata";
type CreateReturnType<T> = T extends { init: () => Promise<void> }
export type CreateReturnType<T> = T extends { init: () => Promise<void> }
? Promise<T>
: T;
interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
export interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
create(options?: T["TOptions"]): CreateReturnType<T>;
}
@@ -33,8 +33,6 @@ export class EmbeddingFunctionRegistry {
/**
* Register an embedding function
* @param name The name of the function
* @param func The function to register
* @throws Error if the function is already registered
*/
register<

View File

@@ -13,8 +13,6 @@ import {
} from "./native.js";
export {
WriteOptions,
WriteMode,
AddColumnsSql,
ColumnAlteration,
ConnectionOptions,
@@ -23,6 +21,9 @@ export {
ClientConfig,
TimeoutConfig,
RetryConfig,
OptimizeStats,
CompactionStats,
RemovalStats,
} from "./native.js";
export {
@@ -36,6 +37,7 @@ export {
Connection,
CreateTableOptions,
TableNamesOptions,
OpenTableOptions,
} from "./connection";
export {
@@ -43,15 +45,41 @@ export {
Query,
QueryBase,
VectorQuery,
QueryExecutionOptions,
FullTextSearchOptions,
RecordBatchIterator,
} from "./query";
export { Index, IndexOptions, IvfPqOptions } from "./indices";
export {
Index,
IndexOptions,
IvfPqOptions,
HnswPqOptions,
HnswSqOptions,
FtsOptions,
} from "./indices";
export { Table, AddDataOptions, UpdateOptions, OptimizeOptions } from "./table";
export {
Table,
AddDataOptions,
UpdateOptions,
OptimizeOptions,
Version,
} from "./table";
export { MergeInsertBuilder } from "./merge";
export * as embedding from "./embedding";
export * as rerankers from "./rerankers";
export {
SchemaLike,
TableLike,
FieldLike,
RecordBatchLike,
DataLike,
IntoVector,
} from "./arrow";
export { IntoSql } from "./util";
/**
* Connect to a LanceDB instance at the given URI.
@@ -64,6 +92,7 @@ export * as rerankers from "./rerankers";
* @param {string} uri - The uri of the database. If the database uri starts
* with `db://` then it connects to a remote database.
* @see {@link ConnectionOptions} for more details on the URI format.
* @param options - The options to use when connecting to the database
* @example
* ```ts
* const conn = await connect("/path/to/database");
@@ -78,7 +107,7 @@ export * as rerankers from "./rerankers";
*/
export async function connect(
uri: string,
opts?: Partial<ConnectionOptions>,
options?: Partial<ConnectionOptions>,
): Promise<Connection>;
/**
* Connect to a LanceDB instance at the given URI.
@@ -99,17 +128,17 @@ export async function connect(
* ```
*/
export async function connect(
opts: Partial<ConnectionOptions> & { uri: string },
options: Partial<ConnectionOptions> & { uri: string },
): Promise<Connection>;
export async function connect(
uriOrOptions: string | (Partial<ConnectionOptions> & { uri: string }),
opts: Partial<ConnectionOptions> = {},
options: Partial<ConnectionOptions> = {},
): Promise<Connection> {
let uri: string | undefined;
if (typeof uriOrOptions !== "string") {
const { uri: uri_, ...options } = uriOrOptions;
const { uri: uri_, ...opts } = uriOrOptions;
uri = uri_;
opts = options;
options = opts;
} else {
uri = uriOrOptions;
}
@@ -118,10 +147,10 @@ export async function connect(
throw new Error("uri is required");
}
opts = (opts as ConnectionOptions) ?? {};
(<ConnectionOptions>opts).storageOptions = cleanseStorageOptions(
(<ConnectionOptions>opts).storageOptions,
options = (options as ConnectionOptions) ?? {};
(<ConnectionOptions>options).storageOptions = cleanseStorageOptions(
(<ConnectionOptions>options).storageOptions,
);
const nativeConn = await LanceDbConnection.new(uri, opts);
const nativeConn = await LanceDbConnection.new(uri, options);
return new LocalConnection(nativeConn);
}

View File

@@ -481,8 +481,6 @@ export class Index {
* The results of a full text search are ordered by relevance measured by BM25.
*
* You can combine filters with full text search.
*
* For now, the full text search index only supports English, and doesn't support phrase search.
*/
static fts(options?: Partial<FtsOptions>) {
return new Index(

View File

@@ -93,10 +93,19 @@ export interface FullTextSearchOptions {
columns?: string | string[];
}
/** Common methods supported by all query types */
/** Common methods supported by all query types
*
* @see {@link Query}
* @see {@link VectorQuery}
*
* @hideconstructor
*/
export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
implements AsyncIterable<RecordBatch>
{
/**
* @hidden
*/
protected constructor(
protected inner: NativeQueryType | Promise<NativeQueryType>,
) {
@@ -104,6 +113,9 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
}
// call a function on the inner (either a promise or the actual object)
/**
* @hidden
*/
protected doCall(fn: (inner: NativeQueryType) => void) {
if (this.inner instanceof Promise) {
this.inner = this.inner.then((inner) => {
@@ -132,7 +144,7 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
}
/**
* A filter statement to be applied to this query.
* @alias where
* @see where
* @deprecated Use `where` instead
*/
filter(predicate: string): this {
@@ -235,7 +247,7 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
* Skip searching un-indexed data. This can make search faster, but will miss
* any data that is not yet indexed.
*
* Use {@link lancedb.Table#optimize} to index all un-indexed data.
* Use {@link Table#optimize} to index all un-indexed data.
*/
fastSearch(): this {
this.doCall((inner: NativeQueryType) => inner.fastSearch());
@@ -254,6 +266,9 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
return this;
}
/**
* @hidden
*/
protected nativeExecute(
options?: Partial<QueryExecutionOptions>,
): Promise<NativeBatchIterator> {
@@ -281,6 +296,9 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
return new RecordBatchIterator(this.nativeExecute(options));
}
/**
* @hidden
*/
// biome-ignore lint/suspicious/noExplicitAny: skip
[Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>> {
const promise = this.nativeExecute();
@@ -343,8 +361,15 @@ export interface ExecutableQuery {}
* A builder used to construct a vector search
*
* This builder can be reused to execute the query many times.
*
* @see {@link Query#nearestTo}
*
* @hideconstructor
*/
export class VectorQuery extends QueryBase<NativeVectorQuery> {
/**
* @hidden
*/
constructor(inner: NativeVectorQuery | Promise<NativeVectorQuery>) {
super(inner);
}
@@ -570,8 +595,16 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
}
}
/** A builder for LanceDB queries. */
/** A builder for LanceDB queries.
*
* @see {@link Table#query}, {@link Table#search}
*
* @hideconstructor
*/
export class Query extends QueryBase<NativeQuery> {
/**
* @hidden
*/
constructor(tbl: NativeTable) {
super(tbl.query());
}

View File

@@ -8,11 +8,12 @@ import { RrfReranker as NativeRRFReranker } from "../native";
/**
* Reranks the results using the Reciprocal Rank Fusion (RRF) algorithm.
*
* Internally this uses the Rust implementation
* @hideconstructor
*/
export class RRFReranker {
private inner: NativeRRFReranker;
/** @ignore */
constructor(inner: NativeRRFReranker) {
this.inner = inner;
}

View File

@@ -6,15 +6,9 @@ import {
Data,
IntoVector,
Schema,
TableLike,
fromDataToBuffer,
fromTableToBuffer,
fromTableToStreamBuffer,
isArrowTable,
makeArrowTable,
tableFromIPC,
} from "./arrow";
import { CreateTableOptions } from "./connection";
import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
import { IndexOptions } from "./indices";
@@ -28,7 +22,6 @@ import {
Table as _NativeTable,
} from "./native";
import { Query, VectorQuery } from "./query";
import { sanitizeTable } from "./sanitize";
import { IntoSql, toSQL } from "./util";
export { IndexConfig } from "./native";
@@ -91,8 +84,14 @@ export interface Version {
* can call the `close` method. Once the Table is closed, it cannot be used for any
* further operations.
*
* Tables are created using the methods {@link Connection#createTable}
* and {@link Connection#createEmptyTable}. Existing tables are opened
* using {@link Connection#openTable}.
*
* Closing a table is optional. It not closed, it will be closed when it is garbage
* collected.
*
* @hideconstructor
*/
export abstract class Table {
[Symbol.for("nodejs.util.inspect.custom")](): string {
@@ -190,8 +189,9 @@ export abstract class Table {
* Indices on scalar columns will speed up filtering (in both
* vector and non-vector searches)
*
* @note We currently don't support custom named indexes,
* The index name will always be `${column}_idx`
* We currently don't support custom named indexes.
* The index name will always be `${column}_idx`.
*
* @example
* // If the column has a vector (fixed size list) data type then
* // an IvfPq vector index will be created.
@@ -221,7 +221,7 @@ export abstract class Table {
*
* @param name The name of the index.
*
* @note This does not delete the index from disk, it just removes it from the table.
* This does not delete the index from disk, it just removes it from the table.
* To delete the index, run {@link Table#optimize} after dropping the index.
*
* Use {@link Table.listIndices} to find the names of the indices.
@@ -432,41 +432,6 @@ export abstract class Table {
* Use {@link Table.listIndices} to find the names of the indices.
*/
abstract indexStats(name: string): Promise<IndexStatistics | undefined>;
static async parseTableData(
data: Record<string, unknown>[] | TableLike,
options?: Partial<CreateTableOptions>,
streaming = false,
) {
let mode: string = options?.mode ?? "create";
const existOk = options?.existOk ?? false;
if (mode === "create" && existOk) {
mode = "exist_ok";
}
let table: ArrowTable;
if (isArrowTable(data)) {
table = sanitizeTable(data);
} else {
table = makeArrowTable(data as Record<string, unknown>[], options);
}
if (streaming) {
const buf = await fromTableToStreamBuffer(
table,
options?.embeddingFunction,
options?.schema,
);
return { buf, mode };
} else {
const buf = await fromTableToBuffer(
table,
options?.embeddingFunction,
options?.schema,
);
return { buf, mode };
}
}
}
export class LocalTable extends Table {

View File

@@ -77,7 +77,7 @@
"build": "npm run build:debug && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts && shx cp lancedb/*.node dist/",
"build-release": "npm run build:release && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts",
"lint-ci": "biome ci .",
"docs": "typedoc --plugin typedoc-plugin-markdown --out ../docs/src/js lancedb/index.ts",
"docs": "typedoc --plugin typedoc-plugin-markdown --treatWarningsAsErrors --out ../docs/src/js lancedb/index.ts",
"postdocs": "node typedoc_post_process.js",
"lint": "biome check . && biome format .",
"lint-fix": "biome check --write . && biome format --write .",

View File

@@ -49,21 +49,6 @@ pub struct ConnectionOptions {
pub host_override: Option<String>,
}
/// Write mode for writing a table.
#[napi(string_enum)]
pub enum WriteMode {
Create,
Append,
Overwrite,
}
/// Write options when creating a Table.
#[napi(object)]
pub struct WriteOptions {
/// Write mode for writing to a table.
pub mode: Option<WriteMode>,
}
#[napi(object)]
pub struct OpenTableOptions {
pub storage_options: Option<HashMap<String, String>>,

View File

@@ -1,11 +1,9 @@
{
"intentionallyNotExported": [
"lancedb/native.d.ts:Connection",
"lancedb/native.d.ts:Index",
"lancedb/native.d.ts:Query",
"lancedb/native.d.ts:VectorQuery",
"lancedb/native.d.ts:RecordBatchIterator",
"lancedb/native.d.ts:Table"
"lancedb/native.d.ts:NativeMergeInsertBuilder"
],
"useHTMLEncodedBrackets": true,
"useCodeBlocks": true,

View File

@@ -40,23 +40,28 @@ function processDirectory(directoryPath) {
function processContents(contents) {
// This changes the parameters section to put the parameter description on
// the same line as the bullet with the parameter name and type.
return contents.replace(/(## Parameters[\s\S]*?)(?=##|$)/g, (match) => {
let lines = match
.split("\n")
.map((line) => line.trim())
return (
contents
.replace(/(## Parameters[\s\S]*?)(?=##|$)/g, (match) => {
let lines = match
.split("\n")
.map((line) => line.trim())
.filter((line) => line !== "")
.map((line) => {
if (line.startsWith("##")) {
return line;
} else if (line.startsWith("•")) {
return "\n*" + line.substring(1);
} else {
return " " + line;
}
});
return lines.join("\n") + "\n\n";
});
.filter((line) => line !== "")
.map((line) => {
if (line.startsWith("##")) {
return line;
} else if (line.startsWith("•")) {
return "\n*" + line.substring(1);
} else {
return " " + line;
}
});
return lines.join("\n") + "\n\n";
})
// Also trim trailing whitespace
.replace(/([^ \t])[ \t]+\n/g, "$1\n")
);
}
// Start processing from the root directory