mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-04 19:02:58 +00:00
Fixes the breaking CI for nodejs, related to the documentation of the
new Permutation API in typescript.
- Expanded the generated typings in `nodejs/lancedb/native.d.ts` to
include `SplitCalculatedOptions`, `splitNames` fields, and the
persist/options-based `splitCalculated` methods so the permutation
exports match the native API.
- The previous block comment block had an inconsistency.
`splitCalculated` takes an options object (`SplitCalculatedOptions`) in
our bindings, not a bare string. The previous example showed
`builder.splitCalculated("user_id % 3");`, which doesn’t match the
actual signature and would fail TS typecheck. I updated the comment to
`builder.splitCalculated({ calculation: "user_id % 3" });` so the
example is now correct.
- Updated the `splitCalculated` example in
`nodejs/lancedb/permutation.ts` to use the options object.
- Ran `npm docs` to ensure docs build correctly.
> [!NOTE]
> **Disclaimer**: I used GPT-5.1-Codex-Max to make these updates, but I
have read the code and run `npm run docs` to verify that they work and
are correct to the best of my knowledge.
203 lines
5.8 KiB
TypeScript
203 lines
5.8 KiB
TypeScript
// SPDX-License-Identifier: Apache-2.0
|
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
|
|
import { Connection, LocalConnection } from "./connection.js";
|
|
import {
|
|
PermutationBuilder as NativePermutationBuilder,
|
|
Table as NativeTable,
|
|
ShuffleOptions,
|
|
SplitCalculatedOptions,
|
|
SplitHashOptions,
|
|
SplitRandomOptions,
|
|
SplitSequentialOptions,
|
|
permutationBuilder as nativePermutationBuilder,
|
|
} from "./native.js";
|
|
import { LocalTable, Table } from "./table";
|
|
|
|
/**
|
|
* A PermutationBuilder for creating data permutations with splits, shuffling, and filtering.
|
|
*
|
|
* This class provides a TypeScript wrapper around the native Rust PermutationBuilder,
|
|
* offering methods to configure data splits, shuffling, and filtering before executing
|
|
* the permutation to create a new table.
|
|
*/
|
|
export class PermutationBuilder {
|
|
private inner: NativePermutationBuilder;
|
|
|
|
/**
|
|
* @hidden
|
|
*/
|
|
constructor(inner: NativePermutationBuilder) {
|
|
this.inner = inner;
|
|
}
|
|
|
|
/**
|
|
* Configure the permutation to be persisted.
|
|
*
|
|
* @param connection - The connection to persist the permutation to
|
|
* @param tableName - The name of the table to create
|
|
* @returns A new PermutationBuilder instance
|
|
* @example
|
|
* ```ts
|
|
* builder.persist(connection, "permutation_table");
|
|
* ```
|
|
*/
|
|
persist(connection: Connection, tableName: string): PermutationBuilder {
|
|
const localConnection = connection as LocalConnection;
|
|
const newInner = this.inner.persist(localConnection.inner, tableName);
|
|
return new PermutationBuilder(newInner);
|
|
}
|
|
|
|
/**
|
|
* Configure random splits for the permutation.
|
|
*
|
|
* @param options - Configuration for random splitting
|
|
* @returns A new PermutationBuilder instance
|
|
* @example
|
|
* ```ts
|
|
* // Split by ratios
|
|
* builder.splitRandom({ ratios: [0.7, 0.3], seed: 42 });
|
|
*
|
|
* // Split by counts
|
|
* builder.splitRandom({ counts: [1000, 500], seed: 42 });
|
|
*
|
|
* // Split with fixed size
|
|
* builder.splitRandom({ fixed: 100, seed: 42 });
|
|
* ```
|
|
*/
|
|
splitRandom(options: SplitRandomOptions): PermutationBuilder {
|
|
const newInner = this.inner.splitRandom(options);
|
|
return new PermutationBuilder(newInner);
|
|
}
|
|
|
|
/**
|
|
* Configure hash-based splits for the permutation.
|
|
*
|
|
* @param options - Configuration for hash-based splitting
|
|
* @returns A new PermutationBuilder instance
|
|
* @example
|
|
* ```ts
|
|
* builder.splitHash({
|
|
* columns: ["user_id"],
|
|
* splitWeights: [70, 30],
|
|
* discardWeight: 0
|
|
* });
|
|
* ```
|
|
*/
|
|
splitHash(options: SplitHashOptions): PermutationBuilder {
|
|
const newInner = this.inner.splitHash(options);
|
|
return new PermutationBuilder(newInner);
|
|
}
|
|
|
|
/**
|
|
* Configure sequential splits for the permutation.
|
|
*
|
|
* @param options - Configuration for sequential splitting
|
|
* @returns A new PermutationBuilder instance
|
|
* @example
|
|
* ```ts
|
|
* // Split by ratios
|
|
* builder.splitSequential({ ratios: [0.8, 0.2] });
|
|
*
|
|
* // Split by counts
|
|
* builder.splitSequential({ counts: [800, 200] });
|
|
*
|
|
* // Split with fixed size
|
|
* builder.splitSequential({ fixed: 1000 });
|
|
* ```
|
|
*/
|
|
splitSequential(options: SplitSequentialOptions): PermutationBuilder {
|
|
const newInner = this.inner.splitSequential(options);
|
|
return new PermutationBuilder(newInner);
|
|
}
|
|
|
|
/**
|
|
* Configure calculated splits for the permutation.
|
|
*
|
|
* @param options - Configuration for calculated splitting
|
|
* @returns A new PermutationBuilder instance
|
|
* @example
|
|
* ```ts
|
|
* builder.splitCalculated({ calculation: "user_id % 3" });
|
|
* ```
|
|
*/
|
|
splitCalculated(options: SplitCalculatedOptions): PermutationBuilder {
|
|
const newInner = this.inner.splitCalculated(options);
|
|
return new PermutationBuilder(newInner);
|
|
}
|
|
|
|
/**
|
|
* Configure shuffling for the permutation.
|
|
*
|
|
* @param options - Configuration for shuffling
|
|
* @returns A new PermutationBuilder instance
|
|
* @example
|
|
* ```ts
|
|
* // Basic shuffle
|
|
* builder.shuffle({ seed: 42 });
|
|
*
|
|
* // Shuffle with clump size
|
|
* builder.shuffle({ seed: 42, clumpSize: 10 });
|
|
* ```
|
|
*/
|
|
shuffle(options: ShuffleOptions): PermutationBuilder {
|
|
const newInner = this.inner.shuffle(options);
|
|
return new PermutationBuilder(newInner);
|
|
}
|
|
|
|
/**
|
|
* Configure filtering for the permutation.
|
|
*
|
|
* @param filter - SQL filter expression
|
|
* @returns A new PermutationBuilder instance
|
|
* @example
|
|
* ```ts
|
|
* builder.filter("age > 18 AND status = 'active'");
|
|
* ```
|
|
*/
|
|
filter(filter: string): PermutationBuilder {
|
|
const newInner = this.inner.filter(filter);
|
|
return new PermutationBuilder(newInner);
|
|
}
|
|
|
|
/**
|
|
* Execute the permutation and create the destination table.
|
|
*
|
|
* @returns A Promise that resolves to the new Table instance
|
|
* @example
|
|
* ```ts
|
|
* const permutationTable = await builder.execute();
|
|
* console.log(`Created table: ${permutationTable.name}`);
|
|
* ```
|
|
*/
|
|
async execute(): Promise<Table> {
|
|
const nativeTable: NativeTable = await this.inner.execute();
|
|
return new LocalTable(nativeTable);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Create a permutation builder for the given table.
|
|
*
|
|
* @param table - The source table to create a permutation from
|
|
* @returns A PermutationBuilder instance
|
|
* @example
|
|
* ```ts
|
|
* const builder = permutationBuilder(sourceTable, "training_data")
|
|
* .splitRandom({ ratios: [0.8, 0.2], seed: 42 })
|
|
* .shuffle({ seed: 123 });
|
|
*
|
|
* const trainingTable = await builder.execute();
|
|
* ```
|
|
*/
|
|
export function permutationBuilder(table: Table): PermutationBuilder {
|
|
// Extract the inner native table from the TypeScript wrapper
|
|
const localTable = table as LocalTable;
|
|
// Access inner through type assertion since it's private
|
|
const nativeBuilder = nativePermutationBuilder(
|
|
// biome-ignore lint/suspicious/noExplicitAny: need access to private variable
|
|
(localTable as any).inner,
|
|
);
|
|
return new PermutationBuilder(nativeBuilder);
|
|
}
|