mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-09 05:12:58 +00:00
fix(node): support specifying arrow field types by name (#2704)
The [`FieldLike` type in
arrow.ts](5ec12c9971/nodejs/lancedb/arrow.ts (L71-L78))
can have a `type: string` property, but before this change, actually
trying to create a table that has a schema that specifies field types by
name results in an error:
```
Error: Expected a Type but object was null/undefined
```
This change adds support for mapping some type name strings to arrow
`DataType`s, so that passing `FieldLike`s with a `type: string` property
to `sanitizeField` does not throw an error.
The type names that can be passed are upper/lowercase variations of the
keys of the `constructorsByTypeName` object. This does not support
mapping types that need parameters, such as timestamps which need
timezones.
With this, it is possible to create empty tables from `SchemaLike`
objects without instantiating arrow types, e.g.:
```
import { SchemaLike } from "../lancedb/arrow"
// ...
const schemaLike = {
fields: [
{
name: "id",
type: "int64",
nullable: true,
},
{
name: "vector",
type: "float64",
nullable: true,
},
],
// ...
} satisfies SchemaLike;
const table = await con.createEmptyTable("test", schemaLike);
```
This change also makes `FieldLike.nullable` required since the `sanitizeField` function throws if it is undefined.
This commit is contained in:
@@ -73,7 +73,7 @@ export type FieldLike =
|
||||
| {
|
||||
type: string;
|
||||
name: string;
|
||||
nullable?: boolean;
|
||||
nullable: boolean;
|
||||
metadata?: Map<string, string>;
|
||||
};
|
||||
|
||||
|
||||
@@ -326,6 +326,9 @@ export function sanitizeDictionary(typeLike: object) {
|
||||
|
||||
// biome-ignore lint/suspicious/noExplicitAny: skip
|
||||
export function sanitizeType(typeLike: unknown): DataType<any> {
|
||||
if (typeof typeLike === "string") {
|
||||
return dataTypeFromName(typeLike);
|
||||
}
|
||||
if (typeof typeLike !== "object" || typeLike === null) {
|
||||
throw Error("Expected a Type but object was null/undefined");
|
||||
}
|
||||
@@ -447,7 +450,7 @@ export function sanitizeType(typeLike: unknown): DataType<any> {
|
||||
case Type.DurationSecond:
|
||||
return new DurationSecond();
|
||||
default:
|
||||
throw new Error("Unrecoginized type id in schema: " + typeId);
|
||||
throw new Error("Unrecognized type id in schema: " + typeId);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -467,7 +470,15 @@ export function sanitizeField(fieldLike: unknown): Field {
|
||||
"The field passed in is missing a `type`/`name`/`nullable` property",
|
||||
);
|
||||
}
|
||||
const type = sanitizeType(fieldLike.type);
|
||||
let type: DataType;
|
||||
try {
|
||||
type = sanitizeType(fieldLike.type);
|
||||
} catch (error: unknown) {
|
||||
throw Error(
|
||||
`Unable to sanitize type for field: ${fieldLike.name} due to error: ${error}`,
|
||||
{ cause: error },
|
||||
);
|
||||
}
|
||||
const name = fieldLike.name;
|
||||
if (!(typeof name === "string")) {
|
||||
throw Error("The field passed in had a non-string `name` property");
|
||||
@@ -581,3 +592,46 @@ function sanitizeData(
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
const constructorsByTypeName = {
|
||||
null: () => new Null(),
|
||||
binary: () => new Binary(),
|
||||
utf8: () => new Utf8(),
|
||||
bool: () => new Bool(),
|
||||
int8: () => new Int8(),
|
||||
int16: () => new Int16(),
|
||||
int32: () => new Int32(),
|
||||
int64: () => new Int64(),
|
||||
uint8: () => new Uint8(),
|
||||
uint16: () => new Uint16(),
|
||||
uint32: () => new Uint32(),
|
||||
uint64: () => new Uint64(),
|
||||
float16: () => new Float16(),
|
||||
float32: () => new Float32(),
|
||||
float64: () => new Float64(),
|
||||
datemillisecond: () => new DateMillisecond(),
|
||||
dateday: () => new DateDay(),
|
||||
timenanosecond: () => new TimeNanosecond(),
|
||||
timemicrosecond: () => new TimeMicrosecond(),
|
||||
timemillisecond: () => new TimeMillisecond(),
|
||||
timesecond: () => new TimeSecond(),
|
||||
intervaldaytime: () => new IntervalDayTime(),
|
||||
intervalyearmonth: () => new IntervalYearMonth(),
|
||||
durationnanosecond: () => new DurationNanosecond(),
|
||||
durationmicrosecond: () => new DurationMicrosecond(),
|
||||
durationmillisecond: () => new DurationMillisecond(),
|
||||
durationsecond: () => new DurationSecond(),
|
||||
} as const;
|
||||
|
||||
type MappableTypeName = keyof typeof constructorsByTypeName;
|
||||
|
||||
export function dataTypeFromName(typeName: string): DataType {
|
||||
const normalizedTypeName = typeName.toLowerCase() as MappableTypeName;
|
||||
const _constructor = constructorsByTypeName[normalizedTypeName];
|
||||
|
||||
if (!_constructor) {
|
||||
throw new Error("Unrecognized type name in schema: " + typeName);
|
||||
}
|
||||
|
||||
return _constructor();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user