mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-27 08:50:39 +00:00
fix(node): support specifying arrow field types by name (#2704)
The [`FieldLike` type in
arrow.ts](5ec12c9971/nodejs/lancedb/arrow.ts (L71-L78))
can have a `type: string` property, but before this change, actually
trying to create a table that has a schema that specifies field types by
name results in an error:
```
Error: Expected a Type but object was null/undefined
```
This change adds support for mapping some type name strings to arrow
`DataType`s, so that passing `FieldLike`s with a `type: string` property
to `sanitizeField` does not throw an error.
The type names that can be passed are upper/lowercase variations of the
keys of the `constructorsByTypeName` object. This does not support
mapping types that need parameters, such as timestamps which need
timezones.
With this, it is possible to create empty tables from `SchemaLike`
objects without instantiating arrow types, e.g.:
```
import { SchemaLike } from "../lancedb/arrow"
// ...
const schemaLike = {
fields: [
{
name: "id",
type: "int64",
nullable: true,
},
{
name: "vector",
type: "float64",
nullable: true,
},
],
// ...
} satisfies SchemaLike;
const table = await con.createEmptyTable("test", schemaLike);
```
This change also makes `FieldLike.nullable` required since the `sanitizeField` function throws if it is undefined.
This commit is contained in:
@@ -10,7 +10,13 @@ import * as arrow16 from "apache-arrow-16";
|
||||
import * as arrow17 from "apache-arrow-17";
|
||||
import * as arrow18 from "apache-arrow-18";
|
||||
|
||||
import { MatchQuery, PhraseQuery, Table, connect } from "../lancedb";
|
||||
import {
|
||||
Connection,
|
||||
MatchQuery,
|
||||
PhraseQuery,
|
||||
Table,
|
||||
connect,
|
||||
} from "../lancedb";
|
||||
import {
|
||||
Table as ArrowTable,
|
||||
Field,
|
||||
@@ -21,6 +27,8 @@ import {
|
||||
Int64,
|
||||
List,
|
||||
Schema,
|
||||
SchemaLike,
|
||||
Type,
|
||||
Uint8,
|
||||
Utf8,
|
||||
makeArrowTable,
|
||||
@@ -2019,3 +2027,52 @@ describe("column name options", () => {
|
||||
expect(results2.length).toBe(10);
|
||||
});
|
||||
});
|
||||
|
||||
describe("when creating an empty table", () => {
|
||||
let con: Connection;
|
||||
beforeEach(async () => {
|
||||
const tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||
con = await connect(tmpDir.name);
|
||||
});
|
||||
afterEach(() => {
|
||||
con.close();
|
||||
});
|
||||
|
||||
it("can create an empty table from an arrow Schema", async () => {
|
||||
const schema = new Schema([
|
||||
new Field("id", new Int64()),
|
||||
new Field("vector", new Float64()),
|
||||
]);
|
||||
const table = await con.createEmptyTable("test", schema);
|
||||
const actualSchema = await table.schema();
|
||||
expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
|
||||
expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
|
||||
expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
|
||||
expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
|
||||
});
|
||||
|
||||
it("can create an empty table from schema that specifies field types by name", async () => {
|
||||
const schemaLike = {
|
||||
fields: [
|
||||
{
|
||||
name: "id",
|
||||
type: "int64",
|
||||
nullable: true,
|
||||
},
|
||||
{
|
||||
name: "vector",
|
||||
type: "float64",
|
||||
nullable: true,
|
||||
},
|
||||
],
|
||||
metadata: new Map(),
|
||||
names: ["id", "vector"],
|
||||
} satisfies SchemaLike;
|
||||
const table = await con.createEmptyTable("test", schemaLike);
|
||||
const actualSchema = await table.schema();
|
||||
expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
|
||||
expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
|
||||
expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
|
||||
expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user