mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-06 03:42:57 +00:00
The eslint rules specify some formatting requirements that are rather strict and conflict with vscode's default formatter. I was unable to get auto-formatting to setup correctly. Also, eslint has quite recently [given up on formatting](https://eslint.org/blog/2023/10/deprecating-formatting-rules/) and recommends using a 3rd party formatter. This PR adds prettier as the formatter. It restores the eslint rules to their defaults. This does mean we now have the "no explicit any" check back on. I know that rule is pedantic but it did help me catch a few corner cases in type testing that weren't covered in the current code. Leaving in draft as this is dependent on other PRs.
236 lines
7.0 KiB
TypeScript
236 lines
7.0 KiB
TypeScript
// Copyright 2024 Lance Developers.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
import { Schema, tableFromIPC } from "apache-arrow";
|
|
import {
|
|
AddColumnsSql,
|
|
ColumnAlteration,
|
|
Table as _NativeTable,
|
|
} from "./native";
|
|
import { Query } from "./query";
|
|
import { IndexBuilder } from "./indexer";
|
|
import { Data, fromDataToBuffer } from "./arrow";
|
|
|
|
/**
|
|
* Options for adding data to a table.
|
|
*/
|
|
export interface AddDataOptions {
|
|
/** If "append" (the default) then the new data will be added to the table
|
|
*
|
|
* If "overwrite" then the new data will replace the existing data in the table.
|
|
*/
|
|
mode: "append" | "overwrite";
|
|
}
|
|
|
|
/**
|
|
* A Table is a collection of Records in a LanceDB Database.
|
|
*
|
|
* A Table object is expected to be long lived and reused for multiple operations.
|
|
* Table objects will cache a certain amount of index data in memory. This cache
|
|
* will be freed when the Table is garbage collected. To eagerly free the cache you
|
|
* can call the `close` method. Once the Table is closed, it cannot be used for any
|
|
* further operations.
|
|
*
|
|
* Closing a table is optional. It not closed, it will be closed when it is garbage
|
|
* collected.
|
|
*/
|
|
export class Table {
|
|
private readonly inner: _NativeTable;
|
|
|
|
/** Construct a Table. Internal use only. */
|
|
constructor(inner: _NativeTable) {
|
|
this.inner = inner;
|
|
}
|
|
|
|
/** Return true if the table has not been closed */
|
|
isOpen(): boolean {
|
|
return this.inner.isOpen();
|
|
}
|
|
|
|
/** Close the table, releasing any underlying resources.
|
|
*
|
|
* It is safe to call this method multiple times.
|
|
*
|
|
* Any attempt to use the table after it is closed will result in an error.
|
|
*/
|
|
close(): void {
|
|
this.inner.close();
|
|
}
|
|
|
|
/** Return a brief description of the table */
|
|
display(): string {
|
|
return this.inner.display();
|
|
}
|
|
|
|
/** Get the schema of the table. */
|
|
async schema(): Promise<Schema> {
|
|
const schemaBuf = await this.inner.schema();
|
|
const tbl = tableFromIPC(schemaBuf);
|
|
return tbl.schema;
|
|
}
|
|
|
|
/**
|
|
* Insert records into this Table.
|
|
*
|
|
* @param {Data} data Records to be inserted into the Table
|
|
* @return The number of rows added to the table
|
|
*/
|
|
async add(data: Data, options?: Partial<AddDataOptions>): Promise<void> {
|
|
const mode = options?.mode ?? "append";
|
|
|
|
const buffer = await fromDataToBuffer(data);
|
|
await this.inner.add(buffer, mode);
|
|
}
|
|
|
|
/** Count the total number of rows in the dataset. */
|
|
async countRows(filter?: string): Promise<number> {
|
|
return await this.inner.countRows(filter);
|
|
}
|
|
|
|
/** Delete the rows that satisfy the predicate. */
|
|
async delete(predicate: string): Promise<void> {
|
|
await this.inner.delete(predicate);
|
|
}
|
|
|
|
/** Create an index over the columns.
|
|
*
|
|
* @param {string} column The column to create the index on. If not specified,
|
|
* it will create an index on vector field.
|
|
*
|
|
* @example
|
|
*
|
|
* By default, it creates vector idnex on one vector column.
|
|
*
|
|
* ```typescript
|
|
* const table = await conn.openTable("my_table");
|
|
* await table.createIndex().build();
|
|
* ```
|
|
*
|
|
* You can specify `IVF_PQ` parameters via `ivf_pq({})` call.
|
|
* ```typescript
|
|
* const table = await conn.openTable("my_table");
|
|
* await table.createIndex("my_vec_col")
|
|
* .ivf_pq({ num_partitions: 128, num_sub_vectors: 16 })
|
|
* .build();
|
|
* ```
|
|
*
|
|
* Or create a Scalar index
|
|
*
|
|
* ```typescript
|
|
* await table.createIndex("my_float_col").build();
|
|
* ```
|
|
*/
|
|
createIndex(column?: string): IndexBuilder {
|
|
let builder = new IndexBuilder(this.inner);
|
|
if (column !== undefined) {
|
|
builder = builder.column(column);
|
|
}
|
|
return builder;
|
|
}
|
|
|
|
/**
|
|
* Create a generic {@link Query} Builder.
|
|
*
|
|
* When appropriate, various indices and statistics based pruning will be used to
|
|
* accelerate the query.
|
|
*
|
|
* @example
|
|
*
|
|
* ### Run a SQL-style query
|
|
* ```typescript
|
|
* for await (const batch of table.query()
|
|
* .filter("id > 1").select(["id"]).limit(20)) {
|
|
* console.log(batch);
|
|
* }
|
|
* ```
|
|
*
|
|
* ### Run Top-10 vector similarity search
|
|
* ```typescript
|
|
* for await (const batch of table.query()
|
|
* .nearestTo([1, 2, 3])
|
|
* .refineFactor(5).nprobe(10)
|
|
* .limit(10)) {
|
|
* console.log(batch);
|
|
* }
|
|
*```
|
|
*
|
|
* ### Scan the full dataset
|
|
* ```typescript
|
|
* for await (const batch of table.query()) {
|
|
* console.log(batch);
|
|
* }
|
|
*
|
|
* ### Return the full dataset as Arrow Table
|
|
* ```typescript
|
|
* let arrowTbl = await table.query().nearestTo([1.0, 2.0, 0.5, 6.7]).toArrow();
|
|
* ```
|
|
*
|
|
* @returns {@link Query}
|
|
*/
|
|
query(): Query {
|
|
return new Query(this.inner);
|
|
}
|
|
|
|
/** Search the table with a given query vector.
|
|
*
|
|
* This is a convenience method for preparing an ANN {@link Query}.
|
|
*/
|
|
search(vector: number[], column?: string): Query {
|
|
const q = this.query();
|
|
q.nearestTo(vector);
|
|
if (column !== undefined) {
|
|
q.column(column);
|
|
}
|
|
return q;
|
|
}
|
|
|
|
// TODO: Support BatchUDF
|
|
/**
|
|
* Add new columns with defined values.
|
|
*
|
|
* @param newColumnTransforms pairs of column names and the SQL expression to use
|
|
* to calculate the value of the new column. These
|
|
* expressions will be evaluated for each row in the
|
|
* table, and can reference existing columns in the table.
|
|
*/
|
|
async addColumns(newColumnTransforms: AddColumnsSql[]): Promise<void> {
|
|
await this.inner.addColumns(newColumnTransforms);
|
|
}
|
|
|
|
/**
|
|
* Alter the name or nullability of columns.
|
|
*
|
|
* @param columnAlterations One or more alterations to apply to columns.
|
|
*/
|
|
async alterColumns(columnAlterations: ColumnAlteration[]): Promise<void> {
|
|
await this.inner.alterColumns(columnAlterations);
|
|
}
|
|
|
|
/**
|
|
* Drop one or more columns from the dataset
|
|
*
|
|
* This is a metadata-only operation and does not remove the data from the
|
|
* underlying storage. In order to remove the data, you must subsequently
|
|
* call ``compact_files`` to rewrite the data without the removed columns and
|
|
* then call ``cleanup_files`` to remove the old files.
|
|
*
|
|
* @param columnNames The names of the columns to drop. These can be nested
|
|
* column references (e.g. "a.b.c") or top-level column
|
|
* names (e.g. "a").
|
|
*/
|
|
async dropColumns(columnNames: string[]): Promise<void> {
|
|
await this.inner.dropColumns(columnNames);
|
|
}
|
|
}
|