fix(nodejs): add better error handling when missing embedding functions (#1290)

note: 
running the default lint command `npm run lint -- --fix` seems to have
made a lot of unrelated changes.
This commit is contained in:
Cory Grinstead
2024-05-14 08:43:39 -05:00
committed by GitHub
parent df9c41f342
commit bc582bb702
5 changed files with 1242 additions and 1017 deletions

View File

@@ -27,23 +27,23 @@ import {
RecordBatch,
makeData,
Struct,
Float,
type Float,
DataType,
Binary,
Float32
} from 'apache-arrow'
import { type EmbeddingFunction } from './index'
import { sanitizeSchema } from './sanitize'
} from "apache-arrow";
import { type EmbeddingFunction } from "./index";
import { sanitizeSchema } from "./sanitize";
/*
* Options to control how a column should be converted to a vector array
*/
export class VectorColumnOptions {
/** Vector column type. */
type: Float = new Float32()
type: Float = new Float32();
constructor (values?: Partial<VectorColumnOptions>) {
Object.assign(this, values)
constructor(values?: Partial<VectorColumnOptions>) {
Object.assign(this, values);
}
}
@@ -60,7 +60,7 @@ export class MakeArrowTableOptions {
* The schema must be specified if there are no records (e.g. to make
* an empty table)
*/
schema?: Schema
schema?: Schema;
/*
* Mapping from vector column name to expected type
@@ -80,7 +80,9 @@ export class MakeArrowTableOptions {
*/
vectorColumns: Record<string, VectorColumnOptions> = {
vector: new VectorColumnOptions()
}
};
embeddings?: EmbeddingFunction<any>;
/**
* If true then string columns will be encoded with dictionary encoding
@@ -91,10 +93,10 @@ export class MakeArrowTableOptions {
*
* If `schema` is provided then this property is ignored.
*/
dictionaryEncodeStrings: boolean = false
dictionaryEncodeStrings: boolean = false;
constructor (values?: Partial<MakeArrowTableOptions>) {
Object.assign(this, values)
constructor(values?: Partial<MakeArrowTableOptions>) {
Object.assign(this, values);
}
}
@@ -193,59 +195,68 @@ export class MakeArrowTableOptions {
* assert.deepEqual(table.schema, schema)
* ```
*/
export function makeArrowTable (
export function makeArrowTable(
data: Array<Record<string, any>>,
options?: Partial<MakeArrowTableOptions>
): ArrowTable {
if (data.length === 0 && (options?.schema === undefined || options?.schema === null)) {
throw new Error('At least one record or a schema needs to be provided')
if (
data.length === 0 &&
(options?.schema === undefined || options?.schema === null)
) {
throw new Error("At least one record or a schema needs to be provided");
}
const opt = new MakeArrowTableOptions(options !== undefined ? options : {})
const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
if (opt.schema !== undefined && opt.schema !== null) {
opt.schema = sanitizeSchema(opt.schema)
opt.schema = sanitizeSchema(opt.schema);
opt.schema = validateSchemaEmbeddings(opt.schema, data, opt.embeddings);
}
const columns: Record<string, Vector> = {}
const columns: Record<string, Vector> = {};
// TODO: sample dataset to find missing columns
// Prefer the field ordering of the schema, if present
const columnNames = ((opt.schema) != null) ? (opt.schema.names as string[]) : Object.keys(data[0])
const columnNames =
opt.schema != null ? (opt.schema.names as string[]) : Object.keys(data[0]);
for (const colName of columnNames) {
if (data.length !== 0 && !Object.prototype.hasOwnProperty.call(data[0], colName)) {
if (
data.length !== 0 &&
!Object.prototype.hasOwnProperty.call(data[0], colName)
) {
// The field is present in the schema, but not in the data, skip it
continue
continue;
}
// Extract a single column from the records (transpose from row-major to col-major)
let values = data.map((datum) => datum[colName])
let values = data.map((datum) => datum[colName]);
// By default (type === undefined) arrow will infer the type from the JS type
let type
let type;
if (opt.schema !== undefined) {
// If there is a schema provided, then use that for the type instead
type = opt.schema?.fields.filter((f) => f.name === colName)[0]?.type
type = opt.schema?.fields.filter((f) => f.name === colName)[0]?.type;
if (DataType.isInt(type) && type.bitWidth === 64) {
// wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
values = values.map((v) => {
if (v === null) {
return v
return v;
}
return BigInt(v)
})
return BigInt(v);
});
}
} else {
// Otherwise, check to see if this column is one of the vector columns
// defined by opt.vectorColumns and, if so, use the fixed size list type
const vectorColumnOptions = opt.vectorColumns[colName]
const vectorColumnOptions = opt.vectorColumns[colName];
if (vectorColumnOptions !== undefined) {
type = newVectorType(values[0].length, vectorColumnOptions.type)
type = newVectorType(values[0].length, vectorColumnOptions.type);
}
}
try {
// Convert an Array of JS values to an arrow vector
columns[colName] = makeVector(values, type, opt.dictionaryEncodeStrings)
columns[colName] = makeVector(values, type, opt.dictionaryEncodeStrings);
} catch (error: unknown) {
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
throw Error(`Could not convert column "${colName}" to Arrow: ${error}`)
throw Error(`Could not convert column "${colName}" to Arrow: ${error}`);
}
}
@@ -260,97 +271,116 @@ export function makeArrowTable (
// To work around this we first create a table with the wrong schema and
// then patch the schema of the batches so we can use
// `new ArrowTable(schema, batches)` which does not do any schema inference
const firstTable = new ArrowTable(columns)
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const batchesFixed = firstTable.batches.map(batch => new RecordBatch(opt.schema!, batch.data))
return new ArrowTable(opt.schema, batchesFixed)
const firstTable = new ArrowTable(columns);
const batchesFixed = firstTable.batches.map(
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
(batch) => new RecordBatch(opt.schema!, batch.data)
);
return new ArrowTable(opt.schema, batchesFixed);
} else {
return new ArrowTable(columns)
return new ArrowTable(columns);
}
}
/**
* Create an empty Arrow table with the provided schema
*/
export function makeEmptyTable (schema: Schema): ArrowTable {
return makeArrowTable([], { schema })
export function makeEmptyTable(schema: Schema): ArrowTable {
return makeArrowTable([], { schema });
}
// Helper function to convert Array<Array<any>> to a variable sized list array
function makeListVector (lists: any[][]): Vector<any> {
function makeListVector(lists: any[][]): Vector<any> {
if (lists.length === 0 || lists[0].length === 0) {
throw Error('Cannot infer list vector from empty array or empty list')
throw Error("Cannot infer list vector from empty array or empty list");
}
const sampleList = lists[0]
let inferredType
const sampleList = lists[0];
let inferredType;
try {
const sampleVector = makeVector(sampleList)
inferredType = sampleVector.type
const sampleVector = makeVector(sampleList);
inferredType = sampleVector.type;
} catch (error: unknown) {
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
throw Error(`Cannot infer list vector. Cannot infer inner type: ${error}`)
throw Error(`Cannot infer list vector. Cannot infer inner type: ${error}`);
}
const listBuilder = makeBuilder({
type: new List(new Field('item', inferredType, true))
})
type: new List(new Field("item", inferredType, true))
});
for (const list of lists) {
listBuilder.append(list)
listBuilder.append(list);
}
return listBuilder.finish().toVector()
return listBuilder.finish().toVector();
}
// Helper function to convert an Array of JS values to an Arrow Vector
function makeVector (values: any[], type?: DataType, stringAsDictionary?: boolean): Vector<any> {
function makeVector(
values: any[],
type?: DataType,
stringAsDictionary?: boolean
): Vector<any> {
if (type !== undefined) {
// No need for inference, let Arrow create it
return vectorFromArray(values, type)
return vectorFromArray(values, type);
}
if (values.length === 0) {
throw Error('makeVector requires at least one value or the type must be specfied')
throw Error(
"makeVector requires at least one value or the type must be specfied"
);
}
const sampleValue = values.find(val => val !== null && val !== undefined)
const sampleValue = values.find((val) => val !== null && val !== undefined);
if (sampleValue === undefined) {
throw Error('makeVector cannot infer the type if all values are null or undefined')
throw Error(
"makeVector cannot infer the type if all values are null or undefined"
);
}
if (Array.isArray(sampleValue)) {
// Default Arrow inference doesn't handle list types
return makeListVector(values)
return makeListVector(values);
} else if (Buffer.isBuffer(sampleValue)) {
// Default Arrow inference doesn't handle Buffer
return vectorFromArray(values, new Binary())
} else if (!(stringAsDictionary ?? false) && (typeof sampleValue === 'string' || sampleValue instanceof String)) {
return vectorFromArray(values, new Binary());
} else if (
!(stringAsDictionary ?? false) &&
(typeof sampleValue === "string" || sampleValue instanceof String)
) {
// If the type is string then don't use Arrow's default inference unless dictionaries are requested
// because it will always use dictionary encoding for strings
return vectorFromArray(values, new Utf8())
return vectorFromArray(values, new Utf8());
} else {
// Convert a JS array of values to an arrow vector
return vectorFromArray(values)
return vectorFromArray(values);
}
}
async function applyEmbeddings<T> (table: ArrowTable, embeddings?: EmbeddingFunction<T>, schema?: Schema): Promise<ArrowTable> {
async function applyEmbeddings<T>(
table: ArrowTable,
embeddings?: EmbeddingFunction<T>,
schema?: Schema
): Promise<ArrowTable> {
if (embeddings == null) {
return table
return table;
}
if (schema !== undefined && schema !== null) {
schema = sanitizeSchema(schema)
schema = sanitizeSchema(schema);
}
// Convert from ArrowTable to Record<String, Vector>
const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
const name = table.schema.fields[idx].name
const name = table.schema.fields[idx].name;
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const vec = table.getChildAt(idx)!
return [name, vec]
})
const newColumns = Object.fromEntries(colEntries)
const vec = table.getChildAt(idx)!;
return [name, vec];
});
const newColumns = Object.fromEntries(colEntries);
const sourceColumn = newColumns[embeddings.sourceColumn]
const destColumn = embeddings.destColumn ?? 'vector'
const innerDestType = embeddings.embeddingDataType ?? new Float32()
const sourceColumn = newColumns[embeddings.sourceColumn];
const destColumn = embeddings.destColumn ?? "vector";
const innerDestType = embeddings.embeddingDataType ?? new Float32();
if (sourceColumn === undefined) {
throw new Error(`Cannot apply embedding function because the source column '${embeddings.sourceColumn}' was not present in the data`)
throw new Error(
`Cannot apply embedding function because the source column '${embeddings.sourceColumn}' was not present in the data`
);
}
if (table.numRows === 0) {
@@ -358,45 +388,60 @@ async function applyEmbeddings<T> (table: ArrowTable, embeddings?: EmbeddingFunc
// We have an empty table and it already has the embedding column so no work needs to be done
// Note: we don't return an error like we did below because this is a common occurrence. For example,
// if we call convertToTable with 0 records and a schema that includes the embedding
return table
return table;
}
if (embeddings.embeddingDimension !== undefined) {
const destType = newVectorType(embeddings.embeddingDimension, innerDestType)
newColumns[destColumn] = makeVector([], destType)
const destType = newVectorType(
embeddings.embeddingDimension,
innerDestType
);
newColumns[destColumn] = makeVector([], destType);
} else if (schema != null) {
const destField = schema.fields.find(f => f.name === destColumn)
const destField = schema.fields.find((f) => f.name === destColumn);
if (destField != null) {
newColumns[destColumn] = makeVector([], destField.type)
newColumns[destColumn] = makeVector([], destField.type);
} else {
throw new Error(`Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`)
throw new Error(
`Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`
);
}
} else {
throw new Error('Attempt to apply embeddings to an empty table when the embeddings function does not specify `embeddingDimension`')
throw new Error(
"Attempt to apply embeddings to an empty table when the embeddings function does not specify `embeddingDimension`"
);
}
} else {
if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
throw new Error(`Attempt to apply embeddings to table failed because column ${destColumn} already existed`)
throw new Error(
`Attempt to apply embeddings to table failed because column ${destColumn} already existed`
);
}
if (table.batches.length > 1) {
throw new Error('Internal error: `makeArrowTable` unexpectedly created a table with more than one batch')
throw new Error(
"Internal error: `makeArrowTable` unexpectedly created a table with more than one batch"
);
}
const values = sourceColumn.toArray()
const vectors = await embeddings.embed(values as T[])
const values = sourceColumn.toArray();
const vectors = await embeddings.embed(values as T[]);
if (vectors.length !== values.length) {
throw new Error('Embedding function did not return an embedding for each input element')
throw new Error(
"Embedding function did not return an embedding for each input element"
);
}
const destType = newVectorType(vectors[0].length, innerDestType)
newColumns[destColumn] = makeVector(vectors, destType)
const destType = newVectorType(vectors[0].length, innerDestType);
newColumns[destColumn] = makeVector(vectors, destType);
}
const newTable = new ArrowTable(newColumns)
const newTable = new ArrowTable(newColumns);
if (schema != null) {
if (schema.fields.find(f => f.name === destColumn) === undefined) {
throw new Error(`When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`)
if (schema.fields.find((f) => f.name === destColumn) === undefined) {
throw new Error(
`When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`
);
}
return alignTable(newTable, schema)
return alignTable(newTable, schema);
}
return newTable
return newTable;
}
/*
@@ -417,21 +462,24 @@ async function applyEmbeddings<T> (table: ArrowTable, embeddings?: EmbeddingFunc
* embedding columns. If no schema is provded then embedding columns will
* be placed at the end of the table, after all of the input columns.
*/
export async function convertToTable<T> (
export async function convertToTable<T>(
data: Array<Record<string, unknown>>,
embeddings?: EmbeddingFunction<T>,
makeTableOptions?: Partial<MakeArrowTableOptions>
): Promise<ArrowTable> {
const table = makeArrowTable(data, makeTableOptions)
return await applyEmbeddings(table, embeddings, makeTableOptions?.schema)
const table = makeArrowTable(data, makeTableOptions);
return await applyEmbeddings(table, embeddings, makeTableOptions?.schema);
}
// Creates the Arrow Type for a Vector column with dimension `dim`
function newVectorType <T extends Float> (dim: number, innerType: T): FixedSizeList<T> {
function newVectorType<T extends Float>(
dim: number,
innerType: T
): FixedSizeList<T> {
// Somewhere we always default to have the elements nullable, so we need to set it to true
// otherwise we often get schema mismatches because the stored data always has schema with nullable elements
const children = new Field<T>('item', innerType, true)
return new FixedSizeList(dim, children)
const children = new Field<T>("item", innerType, true);
return new FixedSizeList(dim, children);
}
/**
@@ -441,17 +489,17 @@ function newVectorType <T extends Float> (dim: number, innerType: T): FixedSizeL
*
* `schema` is required if data is empty
*/
export async function fromRecordsToBuffer<T> (
export async function fromRecordsToBuffer<T>(
data: Array<Record<string, unknown>>,
embeddings?: EmbeddingFunction<T>,
schema?: Schema
): Promise<Buffer> {
if (schema !== undefined && schema !== null) {
schema = sanitizeSchema(schema)
schema = sanitizeSchema(schema);
}
const table = await convertToTable(data, embeddings, { schema })
const writer = RecordBatchFileWriter.writeAll(table)
return Buffer.from(await writer.toUint8Array())
const table = await convertToTable(data, embeddings, { schema, embeddings });
const writer = RecordBatchFileWriter.writeAll(table);
return Buffer.from(await writer.toUint8Array());
}
/**
@@ -461,17 +509,17 @@ export async function fromRecordsToBuffer<T> (
*
* `schema` is required if data is empty
*/
export async function fromRecordsToStreamBuffer<T> (
export async function fromRecordsToStreamBuffer<T>(
data: Array<Record<string, unknown>>,
embeddings?: EmbeddingFunction<T>,
schema?: Schema
): Promise<Buffer> {
if (schema !== null && schema !== undefined) {
schema = sanitizeSchema(schema)
schema = sanitizeSchema(schema);
}
const table = await convertToTable(data, embeddings, { schema })
const writer = RecordBatchStreamWriter.writeAll(table)
return Buffer.from(await writer.toUint8Array())
const table = await convertToTable(data, embeddings, { schema });
const writer = RecordBatchStreamWriter.writeAll(table);
return Buffer.from(await writer.toUint8Array());
}
/**
@@ -482,17 +530,17 @@ export async function fromRecordsToStreamBuffer<T> (
*
* `schema` is required if the table is empty
*/
export async function fromTableToBuffer<T> (
export async function fromTableToBuffer<T>(
table: ArrowTable,
embeddings?: EmbeddingFunction<T>,
schema?: Schema
): Promise<Buffer> {
if (schema !== null && schema !== undefined) {
schema = sanitizeSchema(schema)
schema = sanitizeSchema(schema);
}
const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema)
const writer = RecordBatchFileWriter.writeAll(tableWithEmbeddings)
return Buffer.from(await writer.toUint8Array())
const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
const writer = RecordBatchFileWriter.writeAll(tableWithEmbeddings);
return Buffer.from(await writer.toUint8Array());
}
/**
@@ -503,49 +551,87 @@ export async function fromTableToBuffer<T> (
*
* `schema` is required if the table is empty
*/
export async function fromTableToStreamBuffer<T> (
export async function fromTableToStreamBuffer<T>(
table: ArrowTable,
embeddings?: EmbeddingFunction<T>,
schema?: Schema
): Promise<Buffer> {
if (schema !== null && schema !== undefined) {
schema = sanitizeSchema(schema)
schema = sanitizeSchema(schema);
}
const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema)
const writer = RecordBatchStreamWriter.writeAll(tableWithEmbeddings)
return Buffer.from(await writer.toUint8Array())
const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
const writer = RecordBatchStreamWriter.writeAll(tableWithEmbeddings);
return Buffer.from(await writer.toUint8Array());
}
function alignBatch (batch: RecordBatch, schema: Schema): RecordBatch {
const alignedChildren = []
function alignBatch(batch: RecordBatch, schema: Schema): RecordBatch {
const alignedChildren = [];
for (const field of schema.fields) {
const indexInBatch = batch.schema.fields?.findIndex(
(f) => f.name === field.name
)
);
if (indexInBatch < 0) {
throw new Error(
`The column ${field.name} was not found in the Arrow Table`
)
);
}
alignedChildren.push(batch.data.children[indexInBatch])
alignedChildren.push(batch.data.children[indexInBatch]);
}
const newData = makeData({
type: new Struct(schema.fields),
length: batch.numRows,
nullCount: batch.nullCount,
children: alignedChildren
})
return new RecordBatch(schema, newData)
});
return new RecordBatch(schema, newData);
}
function alignTable (table: ArrowTable, schema: Schema): ArrowTable {
function alignTable(table: ArrowTable, schema: Schema): ArrowTable {
const alignedBatches = table.batches.map((batch) =>
alignBatch(batch, schema)
)
return new ArrowTable(schema, alignedBatches)
);
return new ArrowTable(schema, alignedBatches);
}
// Creates an empty Arrow Table
export function createEmptyTable (schema: Schema): ArrowTable {
return new ArrowTable(sanitizeSchema(schema))
export function createEmptyTable(schema: Schema): ArrowTable {
return new ArrowTable(sanitizeSchema(schema));
}
function validateSchemaEmbeddings(
schema: Schema<any>,
data: Array<Record<string, unknown>>,
embeddings: EmbeddingFunction<any> | undefined
) {
const fields = [];
const missingEmbeddingFields = [];
// First we check if the field is a `FixedSizeList`
// Then we check if the data contains the field
// if it does not, we add it to the list of missing embedding fields
// Finally, we check if those missing embedding fields are `this._embeddings`
// if they are not, we throw an error
for (const field of schema.fields) {
if (field.type instanceof FixedSizeList) {
if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
missingEmbeddingFields.push(field);
} else {
fields.push(field);
}
} else {
fields.push(field);
}
}
if (missingEmbeddingFields.length > 0 && embeddings === undefined) {
console.log({ missingEmbeddingFields, embeddings });
throw new Error(
`Table has embeddings: "${missingEmbeddingFields
.map((f) => f.name)
.join(",")}", but no embedding function was provided`
);
}
return new Schema(fields);
}

View File

@@ -12,19 +12,20 @@
// See the License for the specific language governing permissions and
// limitations under the License.
import { type Schema, Table as ArrowTable, tableFromIPC } from 'apache-arrow'
import { type Schema, Table as ArrowTable, tableFromIPC } from "apache-arrow";
import {
createEmptyTable,
fromRecordsToBuffer,
fromTableToBuffer,
makeArrowTable
} from './arrow'
import type { EmbeddingFunction } from './embedding/embedding_function'
import { RemoteConnection } from './remote'
import { Query } from './query'
import { isEmbeddingFunction } from './embedding/embedding_function'
import { type Literal, toSQL } from './util'
import { type HttpMiddleware } from './middleware'
} from "./arrow";
import type { EmbeddingFunction } from "./embedding/embedding_function";
import { RemoteConnection } from "./remote";
import { Query } from "./query";
import { isEmbeddingFunction } from "./embedding/embedding_function";
import { type Literal, toSQL } from "./util";
import { type HttpMiddleware } from "./middleware";
const {
databaseNew,
@@ -48,14 +49,18 @@ const {
tableAlterColumns,
tableDropColumns
// eslint-disable-next-line @typescript-eslint/no-var-requires
} = require('../native.js')
} = require("../native.js");
export { Query }
export type { EmbeddingFunction }
export { OpenAIEmbeddingFunction } from './embedding/openai'
export { convertToTable, makeArrowTable, type MakeArrowTableOptions } from './arrow'
export { Query };
export type { EmbeddingFunction };
export { OpenAIEmbeddingFunction } from "./embedding/openai";
export {
convertToTable,
makeArrowTable,
type MakeArrowTableOptions
} from "./arrow";
const defaultAwsRegion = 'us-west-2'
const defaultAwsRegion = "us-west-2";
export interface AwsCredentials {
accessKeyId: string
@@ -128,19 +133,19 @@ export interface ConnectionOptions {
readConsistencyInterval?: number
}
function getAwsArgs (opts: ConnectionOptions): any[] {
const callArgs: any[] = []
const awsCredentials = opts.awsCredentials
function getAwsArgs(opts: ConnectionOptions): any[] {
const callArgs: any[] = [];
const awsCredentials = opts.awsCredentials;
if (awsCredentials !== undefined) {
callArgs.push(awsCredentials.accessKeyId)
callArgs.push(awsCredentials.secretKey)
callArgs.push(awsCredentials.sessionToken)
callArgs.push(awsCredentials.accessKeyId);
callArgs.push(awsCredentials.secretKey);
callArgs.push(awsCredentials.sessionToken);
} else {
callArgs.fill(undefined, 0, 3)
callArgs.fill(undefined, 0, 3);
}
callArgs.push(opts.awsRegion)
return callArgs
callArgs.push(opts.awsRegion);
return callArgs;
}
export interface CreateTableOptions<T> {
@@ -173,56 +178,56 @@ export interface CreateTableOptions<T> {
*
* @see {@link ConnectionOptions} for more details on the URI format.
*/
export async function connect (uri: string): Promise<Connection>
export async function connect(uri: string): Promise<Connection>;
/**
* Connect to a LanceDB instance with connection options.
*
* @param opts The {@link ConnectionOptions} to use when connecting to the database.
*/
export async function connect (
export async function connect(
opts: Partial<ConnectionOptions>
): Promise<Connection>
export async function connect (
): Promise<Connection>;
export async function connect(
arg: string | Partial<ConnectionOptions>
): Promise<Connection> {
let opts: ConnectionOptions
if (typeof arg === 'string') {
opts = { uri: arg }
let opts: ConnectionOptions;
if (typeof arg === "string") {
opts = { uri: arg };
} else {
const keys = Object.keys(arg)
if (keys.length === 1 && keys[0] === 'uri' && typeof arg.uri === 'string') {
opts = { uri: arg.uri }
const keys = Object.keys(arg);
if (keys.length === 1 && keys[0] === "uri" && typeof arg.uri === "string") {
opts = { uri: arg.uri };
} else {
opts = Object.assign(
{
uri: '',
uri: "",
awsCredentials: undefined,
awsRegion: defaultAwsRegion,
apiKey: undefined,
region: defaultAwsRegion
},
arg
)
);
}
}
if (opts.uri.startsWith('db://')) {
if (opts.uri.startsWith("db://")) {
// Remote connection
return new RemoteConnection(opts)
return new RemoteConnection(opts);
}
const storageOptions = opts.storageOptions ?? {};
if (opts.awsCredentials?.accessKeyId !== undefined) {
storageOptions.aws_access_key_id = opts.awsCredentials.accessKeyId
storageOptions.aws_access_key_id = opts.awsCredentials.accessKeyId;
}
if (opts.awsCredentials?.secretKey !== undefined) {
storageOptions.aws_secret_access_key = opts.awsCredentials.secretKey
storageOptions.aws_secret_access_key = opts.awsCredentials.secretKey;
}
if (opts.awsCredentials?.sessionToken !== undefined) {
storageOptions.aws_session_token = opts.awsCredentials.sessionToken
storageOptions.aws_session_token = opts.awsCredentials.sessionToken;
}
if (opts.awsRegion !== undefined) {
storageOptions.region = opts.awsRegion
storageOptions.region = opts.awsRegion;
}
// It's a pain to pass a record to Rust, so we convert it to an array of key-value pairs
const storageOptionsArr = Object.entries(storageOptions);
@@ -231,8 +236,8 @@ export async function connect (
opts.uri,
storageOptionsArr,
opts.readConsistencyInterval
)
return new LocalConnection(db, opts)
);
return new LocalConnection(db, opts);
}
/**
@@ -533,7 +538,11 @@ export interface Table<T = number[]> {
* @param data the new data to insert
* @param args parameters controlling how the operation should behave
*/
mergeInsert: (on: string, data: Array<Record<string, unknown>> | ArrowTable, args: MergeInsertArgs) => Promise<void>
mergeInsert: (
on: string,
data: Array<Record<string, unknown>> | ArrowTable,
args: MergeInsertArgs
) => Promise<void>
/**
* List the indicies on this table.
@@ -558,7 +567,9 @@ export interface Table<T = number[]> {
* expressions will be evaluated for each row in the
* table, and can reference existing columns in the table.
*/
addColumns(newColumnTransforms: Array<{ name: string, valueSql: string }>): Promise<void>
addColumns(
newColumnTransforms: Array<{ name: string, valueSql: string }>
): Promise<void>
/**
* Alter the name or nullability of columns.
@@ -699,23 +710,23 @@ export interface IndexStats {
* A connection to a LanceDB database.
*/
export class LocalConnection implements Connection {
private readonly _options: () => ConnectionOptions
private readonly _db: any
private readonly _options: () => ConnectionOptions;
private readonly _db: any;
constructor (db: any, options: ConnectionOptions) {
this._options = () => options
this._db = db
constructor(db: any, options: ConnectionOptions) {
this._options = () => options;
this._db = db;
}
get uri (): string {
return this._options().uri
get uri(): string {
return this._options().uri;
}
/**
* Get the names of all tables in the database.
*/
async tableNames (): Promise<string[]> {
return databaseTableNames.call(this._db)
async tableNames(): Promise<string[]> {
return databaseTableNames.call(this._db);
}
/**
@@ -723,7 +734,7 @@ export class LocalConnection implements Connection {
*
* @param name The name of the table.
*/
async openTable (name: string): Promise<Table>
async openTable(name: string): Promise<Table>;
/**
* Open a table in the database.
@@ -734,23 +745,20 @@ export class LocalConnection implements Connection {
async openTable<T>(
name: string,
embeddings: EmbeddingFunction<T>
): Promise<Table<T>>
): Promise<Table<T>>;
async openTable<T>(
name: string,
embeddings?: EmbeddingFunction<T>
): Promise<Table<T>>
): Promise<Table<T>>;
async openTable<T>(
name: string,
embeddings?: EmbeddingFunction<T>
): Promise<Table<T>> {
const tbl = await databaseOpenTable.call(
this._db,
name,
)
const tbl = await databaseOpenTable.call(this._db, name);
if (embeddings !== undefined) {
return new LocalTable(tbl, name, this._options(), embeddings)
return new LocalTable(tbl, name, this._options(), embeddings);
} else {
return new LocalTable(tbl, name, this._options())
return new LocalTable(tbl, name, this._options());
}
}
@@ -760,32 +768,32 @@ export class LocalConnection implements Connection {
optsOrEmbedding?: WriteOptions | EmbeddingFunction<T>,
opt?: WriteOptions
): Promise<Table<T>> {
if (typeof name === 'string') {
let writeOptions: WriteOptions = new DefaultWriteOptions()
if (typeof name === "string") {
let writeOptions: WriteOptions = new DefaultWriteOptions();
if (opt !== undefined && isWriteOptions(opt)) {
writeOptions = opt
writeOptions = opt;
} else if (
optsOrEmbedding !== undefined &&
isWriteOptions(optsOrEmbedding)
) {
writeOptions = optsOrEmbedding
writeOptions = optsOrEmbedding;
}
let embeddings: undefined | EmbeddingFunction<T>
let embeddings: undefined | EmbeddingFunction<T>;
if (
optsOrEmbedding !== undefined &&
isEmbeddingFunction(optsOrEmbedding)
) {
embeddings = optsOrEmbedding
embeddings = optsOrEmbedding;
}
return await this.createTableImpl({
name,
data,
embeddingFunction: embeddings,
writeOptions
})
});
}
return await this.createTableImpl(name)
return await this.createTableImpl(name);
}
private async createTableImpl<T>({
@@ -801,27 +809,27 @@ export class LocalConnection implements Connection {
embeddingFunction?: EmbeddingFunction<T> | undefined
writeOptions?: WriteOptions | undefined
}): Promise<Table<T>> {
let buffer: Buffer
let buffer: Buffer;
function isEmpty (
function isEmpty(
data: Array<Record<string, unknown>> | ArrowTable<any>
): boolean {
if (data instanceof ArrowTable) {
return data.data.length === 0
return data.data.length === 0;
}
return data.length === 0
return data.length === 0;
}
if (data === undefined || isEmpty(data)) {
if (schema === undefined) {
throw new Error('Either data or schema needs to defined')
throw new Error("Either data or schema needs to defined");
}
buffer = await fromTableToBuffer(createEmptyTable(schema))
buffer = await fromTableToBuffer(createEmptyTable(schema));
} else if (data instanceof ArrowTable) {
buffer = await fromTableToBuffer(data, embeddingFunction, schema)
buffer = await fromTableToBuffer(data, embeddingFunction, schema);
} else {
// data is Array<Record<...>>
buffer = await fromRecordsToBuffer(data, embeddingFunction, schema)
buffer = await fromRecordsToBuffer(data, embeddingFunction, schema);
}
const tbl = await tableCreate.call(
@@ -830,11 +838,11 @@ export class LocalConnection implements Connection {
buffer,
writeOptions?.writeMode?.toString(),
...getAwsArgs(this._options())
)
);
if (embeddingFunction !== undefined) {
return new LocalTable(tbl, name, this._options(), embeddingFunction)
return new LocalTable(tbl, name, this._options(), embeddingFunction);
} else {
return new LocalTable(tbl, name, this._options())
return new LocalTable(tbl, name, this._options());
}
}
@@ -842,69 +850,69 @@ export class LocalConnection implements Connection {
* Drop an existing table.
* @param name The name of the table to drop.
*/
async dropTable (name: string): Promise<void> {
await databaseDropTable.call(this._db, name)
async dropTable(name: string): Promise<void> {
await databaseDropTable.call(this._db, name);
}
withMiddleware (middleware: HttpMiddleware): Connection {
return this
withMiddleware(middleware: HttpMiddleware): Connection {
return this;
}
}
export class LocalTable<T = number[]> implements Table<T> {
private _tbl: any
private readonly _name: string
private readonly _isElectron: boolean
private readonly _embeddings?: EmbeddingFunction<T>
private readonly _options: () => ConnectionOptions
private _tbl: any;
private readonly _name: string;
private readonly _isElectron: boolean;
private readonly _embeddings?: EmbeddingFunction<T>;
private readonly _options: () => ConnectionOptions;
constructor (tbl: any, name: string, options: ConnectionOptions)
constructor(tbl: any, name: string, options: ConnectionOptions);
/**
* @param tbl
* @param name
* @param options
* @param embeddings An embedding function to use when interacting with this table
*/
constructor (
constructor(
tbl: any,
name: string,
options: ConnectionOptions,
embeddings: EmbeddingFunction<T>
)
constructor (
);
constructor(
tbl: any,
name: string,
options: ConnectionOptions,
embeddings?: EmbeddingFunction<T>
) {
this._tbl = tbl
this._name = name
this._embeddings = embeddings
this._options = () => options
this._isElectron = this.checkElectron()
this._tbl = tbl;
this._name = name;
this._embeddings = embeddings;
this._options = () => options;
this._isElectron = this.checkElectron();
}
get name (): string {
return this._name
get name(): string {
return this._name;
}
/**
* Creates a search query to find the nearest neighbors of the given search term
* @param query The query search term
*/
search (query: T): Query<T> {
return new Query(query, this._tbl, this._embeddings)
search(query: T): Query<T> {
return new Query(query, this._tbl, this._embeddings);
}
/**
* Creates a filter query to find all rows matching the specified criteria
* @param value The filter criteria (like SQL where clause syntax)
*/
filter (value: string): Query<T> {
return new Query(undefined, this._tbl, this._embeddings).filter(value)
filter(value: string): Query<T> {
return new Query(undefined, this._tbl, this._embeddings).filter(value);
}
where = this.filter
where = this.filter;
/**
* Insert records into this Table.
@@ -912,16 +920,19 @@ export class LocalTable<T = number[]> implements Table<T> {
* @param data Records to be inserted into the Table
* @return The number of rows added to the table
*/
async add (
async add(
data: Array<Record<string, unknown>> | ArrowTable
): Promise<number> {
const schema = await this.schema
let tbl: ArrowTable
const schema = await this.schema;
let tbl: ArrowTable;
if (data instanceof ArrowTable) {
tbl = data
tbl = data;
} else {
tbl = makeArrowTable(data, { schema })
tbl = makeArrowTable(data, { schema, embeddings: this._embeddings });
}
return tableAdd
.call(
this._tbl,
@@ -930,8 +941,8 @@ export class LocalTable<T = number[]> implements Table<T> {
...getAwsArgs(this._options())
)
.then((newTable: any) => {
this._tbl = newTable
})
this._tbl = newTable;
});
}
/**
@@ -940,14 +951,14 @@ export class LocalTable<T = number[]> implements Table<T> {
* @param data Records to be inserted into the Table
* @return The number of rows added to the table
*/
async overwrite (
async overwrite(
data: Array<Record<string, unknown>> | ArrowTable
): Promise<number> {
let buffer: Buffer
let buffer: Buffer;
if (data instanceof ArrowTable) {
buffer = await fromTableToBuffer(data, this._embeddings)
buffer = await fromTableToBuffer(data, this._embeddings);
} else {
buffer = await fromRecordsToBuffer(data, this._embeddings)
buffer = await fromRecordsToBuffer(data, this._embeddings);
}
return tableAdd
.call(
@@ -957,8 +968,8 @@ export class LocalTable<T = number[]> implements Table<T> {
...getAwsArgs(this._options())
)
.then((newTable: any) => {
this._tbl = newTable
})
this._tbl = newTable;
});
}
/**
@@ -966,26 +977,26 @@ export class LocalTable<T = number[]> implements Table<T> {
*
* @param indexParams The parameters of this Index, @see VectorIndexParams.
*/
async createIndex (indexParams: VectorIndexParams): Promise<any> {
async createIndex(indexParams: VectorIndexParams): Promise<any> {
return tableCreateVectorIndex
.call(this._tbl, indexParams)
.then((newTable: any) => {
this._tbl = newTable
})
this._tbl = newTable;
});
}
async createScalarIndex (column: string, replace?: boolean): Promise<void> {
async createScalarIndex(column: string, replace?: boolean): Promise<void> {
if (replace === undefined) {
replace = true
replace = true;
}
return tableCreateScalarIndex.call(this._tbl, column, replace)
return tableCreateScalarIndex.call(this._tbl, column, replace);
}
/**
* Returns the number of rows in this table.
*/
async countRows (filter?: string): Promise<number> {
return tableCountRows.call(this._tbl, filter)
async countRows(filter?: string): Promise<number> {
return tableCountRows.call(this._tbl, filter);
}
/**
@@ -993,10 +1004,10 @@ export class LocalTable<T = number[]> implements Table<T> {
*
* @param filter A filter in the same format used by a sql WHERE clause.
*/
async delete (filter: string): Promise<void> {
async delete(filter: string): Promise<void> {
return tableDelete.call(this._tbl, filter).then((newTable: any) => {
this._tbl = newTable
})
this._tbl = newTable;
});
}
/**
@@ -1006,55 +1017,65 @@ export class LocalTable<T = number[]> implements Table<T> {
*
* @returns
*/
async update (args: UpdateArgs | UpdateSqlArgs): Promise<void> {
let filter: string | null
let updates: Record<string, string>
async update(args: UpdateArgs | UpdateSqlArgs): Promise<void> {
let filter: string | null;
let updates: Record<string, string>;
if ('valuesSql' in args) {
filter = args.where ?? null
updates = args.valuesSql
if ("valuesSql" in args) {
filter = args.where ?? null;
updates = args.valuesSql;
} else {
filter = args.where ?? null
updates = {}
filter = args.where ?? null;
updates = {};
for (const [key, value] of Object.entries(args.values)) {
updates[key] = toSQL(value)
updates[key] = toSQL(value);
}
}
return tableUpdate
.call(this._tbl, filter, updates)
.then((newTable: any) => {
this._tbl = newTable
})
this._tbl = newTable;
});
}
async mergeInsert (on: string, data: Array<Record<string, unknown>> | ArrowTable, args: MergeInsertArgs): Promise<void> {
let whenMatchedUpdateAll = false
let whenMatchedUpdateAllFilt = null
if (args.whenMatchedUpdateAll !== undefined && args.whenMatchedUpdateAll !== null) {
whenMatchedUpdateAll = true
async mergeInsert(
on: string,
data: Array<Record<string, unknown>> | ArrowTable,
args: MergeInsertArgs
): Promise<void> {
let whenMatchedUpdateAll = false;
let whenMatchedUpdateAllFilt = null;
if (
args.whenMatchedUpdateAll !== undefined &&
args.whenMatchedUpdateAll !== null
) {
whenMatchedUpdateAll = true;
if (args.whenMatchedUpdateAll !== true) {
whenMatchedUpdateAllFilt = args.whenMatchedUpdateAll
whenMatchedUpdateAllFilt = args.whenMatchedUpdateAll;
}
}
const whenNotMatchedInsertAll = args.whenNotMatchedInsertAll ?? false
let whenNotMatchedBySourceDelete = false
let whenNotMatchedBySourceDeleteFilt = null
if (args.whenNotMatchedBySourceDelete !== undefined && args.whenNotMatchedBySourceDelete !== null) {
whenNotMatchedBySourceDelete = true
const whenNotMatchedInsertAll = args.whenNotMatchedInsertAll ?? false;
let whenNotMatchedBySourceDelete = false;
let whenNotMatchedBySourceDeleteFilt = null;
if (
args.whenNotMatchedBySourceDelete !== undefined &&
args.whenNotMatchedBySourceDelete !== null
) {
whenNotMatchedBySourceDelete = true;
if (args.whenNotMatchedBySourceDelete !== true) {
whenNotMatchedBySourceDeleteFilt = args.whenNotMatchedBySourceDelete
whenNotMatchedBySourceDeleteFilt = args.whenNotMatchedBySourceDelete;
}
}
const schema = await this.schema
let tbl: ArrowTable
const schema = await this.schema;
let tbl: ArrowTable;
if (data instanceof ArrowTable) {
tbl = data
tbl = data;
} else {
tbl = makeArrowTable(data, { schema })
tbl = makeArrowTable(data, { schema });
}
const buffer = await fromTableToBuffer(tbl, this._embeddings, schema)
const buffer = await fromTableToBuffer(tbl, this._embeddings, schema);
this._tbl = await tableMergeInsert.call(
this._tbl,
@@ -1065,7 +1086,7 @@ export class LocalTable<T = number[]> implements Table<T> {
whenNotMatchedBySourceDelete,
whenNotMatchedBySourceDeleteFilt,
buffer
)
);
}
/**
@@ -1083,16 +1104,16 @@ export class LocalTable<T = number[]> implements Table<T> {
* uphold this promise can lead to corrupted tables.
* @returns
*/
async cleanupOldVersions (
async cleanupOldVersions(
olderThan?: number,
deleteUnverified?: boolean
): Promise<CleanupStats> {
return tableCleanupOldVersions
.call(this._tbl, olderThan, deleteUnverified)
.then((res: { newTable: any, metrics: CleanupStats }) => {
this._tbl = res.newTable
return res.metrics
})
this._tbl = res.newTable;
return res.metrics;
});
}
/**
@@ -1106,62 +1127,64 @@ export class LocalTable<T = number[]> implements Table<T> {
* for most tables.
* @returns Metrics about the compaction operation.
*/
async compactFiles (options?: CompactionOptions): Promise<CompactionMetrics> {
const optionsArg = options ?? {}
async compactFiles(options?: CompactionOptions): Promise<CompactionMetrics> {
const optionsArg = options ?? {};
return tableCompactFiles
.call(this._tbl, optionsArg)
.then((res: { newTable: any, metrics: CompactionMetrics }) => {
this._tbl = res.newTable
return res.metrics
})
this._tbl = res.newTable;
return res.metrics;
});
}
async listIndices (): Promise<VectorIndex[]> {
return tableListIndices.call(this._tbl)
async listIndices(): Promise<VectorIndex[]> {
return tableListIndices.call(this._tbl);
}
async indexStats (indexUuid: string): Promise<IndexStats> {
return tableIndexStats.call(this._tbl, indexUuid)
async indexStats(indexUuid: string): Promise<IndexStats> {
return tableIndexStats.call(this._tbl, indexUuid);
}
get schema (): Promise<Schema> {
get schema(): Promise<Schema> {
// empty table
return this.getSchema()
return this.getSchema();
}
private async getSchema (): Promise<Schema> {
const buffer = await tableSchema.call(this._tbl, this._isElectron)
const table = tableFromIPC(buffer)
return table.schema
private async getSchema(): Promise<Schema> {
const buffer = await tableSchema.call(this._tbl, this._isElectron);
const table = tableFromIPC(buffer);
return table.schema;
}
// See https://github.com/electron/electron/issues/2288
private checkElectron (): boolean {
private checkElectron(): boolean {
try {
// eslint-disable-next-line no-prototype-builtins
return (
Object.prototype.hasOwnProperty.call(process?.versions, 'electron') ||
navigator?.userAgent?.toLowerCase()?.includes(' electron')
)
Object.prototype.hasOwnProperty.call(process?.versions, "electron") ||
navigator?.userAgent?.toLowerCase()?.includes(" electron")
);
} catch (e) {
return false
return false;
}
}
async addColumns (newColumnTransforms: Array<{ name: string, valueSql: string }>): Promise<void> {
return tableAddColumns.call(this._tbl, newColumnTransforms)
async addColumns(
newColumnTransforms: Array<{ name: string, valueSql: string }>
): Promise<void> {
return tableAddColumns.call(this._tbl, newColumnTransforms);
}
async alterColumns (columnAlterations: ColumnAlteration[]): Promise<void> {
return tableAlterColumns.call(this._tbl, columnAlterations)
async alterColumns(columnAlterations: ColumnAlteration[]): Promise<void> {
return tableAlterColumns.call(this._tbl, columnAlterations);
}
async dropColumns (columnNames: string[]): Promise<void> {
return tableDropColumns.call(this._tbl, columnNames)
async dropColumns(columnNames: string[]): Promise<void> {
return tableDropColumns.call(this._tbl, columnNames);
}
withMiddleware (middleware: HttpMiddleware): Table<T> {
return this
withMiddleware(middleware: HttpMiddleware): Table<T> {
return this;
}
}
@@ -1184,7 +1207,7 @@ export interface CompactionOptions {
*/
targetRowsPerFragment?: number
/**
* The maximum number of rows per group. Defaults to 1024.
* The maximum number of T per group. Defaults to 1024.
*/
maxRowsPerGroup?: number
/**
@@ -1284,21 +1307,21 @@ export interface IvfPQIndexConfig {
*/
index_cache_size?: number
type: 'ivf_pq'
type: "ivf_pq"
}
export type VectorIndexParams = IvfPQIndexConfig
export type VectorIndexParams = IvfPQIndexConfig;
/**
* Write mode for writing a table.
*/
export enum WriteMode {
/** Create a new {@link Table}. */
Create = 'create',
Create = "create",
/** Overwrite the existing {@link Table} if presented. */
Overwrite = 'overwrite',
Overwrite = "overwrite",
/** Append new data to the table. */
Append = 'append',
Append = "append",
}
/**
@@ -1310,14 +1333,14 @@ export interface WriteOptions {
}
export class DefaultWriteOptions implements WriteOptions {
writeMode = WriteMode.Create
writeMode = WriteMode.Create;
}
export function isWriteOptions (value: any): value is WriteOptions {
export function isWriteOptions(value: any): value is WriteOptions {
return (
Object.keys(value).length === 1 &&
(value.writeMode === undefined || typeof value.writeMode === 'string')
)
(value.writeMode === undefined || typeof value.writeMode === "string")
);
}
/**
@@ -1327,15 +1350,15 @@ export enum MetricType {
/**
* Euclidean distance
*/
L2 = 'l2',
L2 = "l2",
/**
* Cosine distance
*/
Cosine = 'cosine',
Cosine = "cosine",
/**
* Dot product
*/
Dot = 'dot',
Dot = "dot",
}

View File

@@ -32,7 +32,7 @@ import {
Bool,
Date_,
Decimal,
DataType,
type DataType,
Dictionary,
Binary,
Float32,
@@ -74,12 +74,12 @@ import {
DurationNanosecond,
DurationMicrosecond,
DurationMillisecond,
DurationSecond,
DurationSecond
} from "apache-arrow";
import type { IntBitWidth, TimeBitWidth } from "apache-arrow/type";
function sanitizeMetadata(
metadataLike?: unknown,
metadataLike?: unknown
): Map<string, string> | undefined {
if (metadataLike === undefined || metadataLike === null) {
return undefined;
@@ -90,7 +90,7 @@ function sanitizeMetadata(
for (const item of metadataLike) {
if (!(typeof item[0] === "string" || !(typeof item[1] === "string"))) {
throw Error(
"Expected metadata, if present, to be a Map<string, string> but it had non-string keys or values",
"Expected metadata, if present, to be a Map<string, string> but it had non-string keys or values"
);
}
}
@@ -105,7 +105,7 @@ function sanitizeInt(typeLike: object) {
typeof typeLike.isSigned !== "boolean"
) {
throw Error(
"Expected an Int Type to have a `bitWidth` and `isSigned` property",
"Expected an Int Type to have a `bitWidth` and `isSigned` property"
);
}
return new Int(typeLike.isSigned, typeLike.bitWidth as IntBitWidth);
@@ -128,7 +128,7 @@ function sanitizeDecimal(typeLike: object) {
typeof typeLike.bitWidth !== "number"
) {
throw Error(
"Expected a Decimal Type to have `scale`, `precision`, and `bitWidth` properties",
"Expected a Decimal Type to have `scale`, `precision`, and `bitWidth` properties"
);
}
return new Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
@@ -149,7 +149,7 @@ function sanitizeTime(typeLike: object) {
typeof typeLike.bitWidth !== "number"
) {
throw Error(
"Expected a Time type to have `unit` and `bitWidth` properties",
"Expected a Time type to have `unit` and `bitWidth` properties"
);
}
return new Time(typeLike.unit, typeLike.bitWidth as TimeBitWidth);
@@ -172,7 +172,7 @@ function sanitizeTypedTimestamp(
| typeof TimestampNanosecond
| typeof TimestampMicrosecond
| typeof TimestampMillisecond
| typeof TimestampSecond,
| typeof TimestampSecond
) {
let timezone = null;
if ("timezone" in typeLike && typeof typeLike.timezone === "string") {
@@ -191,7 +191,7 @@ function sanitizeInterval(typeLike: object) {
function sanitizeList(typeLike: object) {
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
throw Error(
"Expected a List type to have an array-like `children` property",
"Expected a List type to have an array-like `children` property"
);
}
if (typeLike.children.length !== 1) {
@@ -203,7 +203,7 @@ function sanitizeList(typeLike: object) {
function sanitizeStruct(typeLike: object) {
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
throw Error(
"Expected a Struct type to have an array-like `children` property",
"Expected a Struct type to have an array-like `children` property"
);
}
return new Struct(typeLike.children.map((child) => sanitizeField(child)));
@@ -216,47 +216,47 @@ function sanitizeUnion(typeLike: object) {
typeof typeLike.mode !== "number"
) {
throw Error(
"Expected a Union type to have `typeIds` and `mode` properties",
"Expected a Union type to have `typeIds` and `mode` properties"
);
}
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
throw Error(
"Expected a Union type to have an array-like `children` property",
"Expected a Union type to have an array-like `children` property"
);
}
return new Union(
typeLike.mode,
typeLike.typeIds as any,
typeLike.children.map((child) => sanitizeField(child)),
typeLike.children.map((child) => sanitizeField(child))
);
}
function sanitizeTypedUnion(
typeLike: object,
UnionType: typeof DenseUnion | typeof SparseUnion,
UnionType: typeof DenseUnion | typeof SparseUnion
) {
if (!("typeIds" in typeLike)) {
throw Error(
"Expected a DenseUnion/SparseUnion type to have a `typeIds` property",
"Expected a DenseUnion/SparseUnion type to have a `typeIds` property"
);
}
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
throw Error(
"Expected a DenseUnion/SparseUnion type to have an array-like `children` property",
"Expected a DenseUnion/SparseUnion type to have an array-like `children` property"
);
}
return new UnionType(
typeLike.typeIds as any,
typeLike.children.map((child) => sanitizeField(child)),
typeLike.children.map((child) => sanitizeField(child))
);
}
function sanitizeFixedSizeBinary(typeLike: object) {
if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") {
throw Error(
"Expected a FixedSizeBinary type to have a `byteWidth` property",
"Expected a FixedSizeBinary type to have a `byteWidth` property"
);
}
return new FixedSizeBinary(typeLike.byteWidth);
@@ -268,7 +268,7 @@ function sanitizeFixedSizeList(typeLike: object) {
}
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
throw Error(
"Expected a FixedSizeList type to have an array-like `children` property",
"Expected a FixedSizeList type to have an array-like `children` property"
);
}
if (typeLike.children.length !== 1) {
@@ -276,14 +276,14 @@ function sanitizeFixedSizeList(typeLike: object) {
}
return new FixedSizeList(
typeLike.listSize,
sanitizeField(typeLike.children[0]),
sanitizeField(typeLike.children[0])
);
}
function sanitizeMap(typeLike: object) {
if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
throw Error(
"Expected a Map type to have an array-like `children` property",
"Expected a Map type to have an array-like `children` property"
);
}
if (!("keysSorted" in typeLike) || typeof typeLike.keysSorted !== "boolean") {
@@ -291,7 +291,7 @@ function sanitizeMap(typeLike: object) {
}
return new Map_(
typeLike.children.map((field) => sanitizeField(field)) as any,
typeLike.keysSorted,
typeLike.keysSorted
);
}
@@ -319,7 +319,7 @@ function sanitizeDictionary(typeLike: object) {
sanitizeType(typeLike.dictionary),
sanitizeType(typeLike.indices) as any,
typeLike.id,
typeLike.isOrdered,
typeLike.isOrdered
);
}
@@ -454,7 +454,7 @@ function sanitizeField(fieldLike: unknown): Field {
!("nullable" in fieldLike)
) {
throw Error(
"The field passed in is missing a `type`/`name`/`nullable` property",
"The field passed in is missing a `type`/`name`/`nullable` property"
);
}
const type = sanitizeType(fieldLike.type);
@@ -489,7 +489,7 @@ export function sanitizeSchema(schemaLike: unknown): Schema {
}
if (!("fields" in schemaLike)) {
throw Error(
"The schema passed in does not appear to be a schema (no 'fields' property)",
"The schema passed in does not appear to be a schema (no 'fields' property)"
);
}
let metadata;
@@ -498,11 +498,11 @@ export function sanitizeSchema(schemaLike: unknown): Schema {
}
if (!Array.isArray(schemaLike.fields)) {
throw Error(
"The schema passed in had a 'fields' property but it was not an array",
"The schema passed in had a 'fields' property but it was not an array"
);
}
const sanitizedFields = schemaLike.fields.map((field) =>
sanitizeField(field),
sanitizeField(field)
);
return new Schema(sanitizedFields, metadata);
}

File diff suppressed because it is too large Load Diff