mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-22 21:09:58 +00:00
fix: undefined values should become null in nullable fields (#2658)
### Bug Fix: Undefined Values in Nullable Fields **Issue**: When inserting data with `undefined` values into nullable fields, LanceDB was incorrectly coercing them to default values (`false` for booleans, `NaN` for numbers, `""` for strings) instead of `null`. **Fix**: Modified the `makeVector()` function in `arrow.ts` to properly convert `undefined` values to `null` for nullable fields before passing data to Apache Arrow. fixes: #2645 **Result**: Now `{ text: undefined, number: undefined, bool: undefined }` correctly becomes `{ text: null, number: null, bool: null }` when fields are marked as nullable in the schema. **Files Changed**: - `nodejs/lancedb/arrow.ts` (core fix) - `nodejs/__test__/arrow.test.ts` (test coverage) - This ensures proper null handling for nullable fields as expected by users. --------- Co-authored-by: Will Jones <willjones127@gmail.com>
This commit is contained in:
@@ -1039,3 +1039,33 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
// Test for the undefined values bug fix
|
||||
describe("undefined values handling", () => {
|
||||
it("should handle mixed undefined and actual values", () => {
|
||||
const schema = new Schema([
|
||||
new Field("text", new Utf8(), true), // nullable
|
||||
new Field("number", new Int32(), true), // nullable
|
||||
new Field("bool", new Bool(), true), // nullable
|
||||
]);
|
||||
|
||||
const data = [
|
||||
{ text: undefined, number: 42, bool: true },
|
||||
{ text: "hello", number: undefined, bool: false },
|
||||
{ text: "world", number: 123, bool: undefined },
|
||||
];
|
||||
const table = makeArrowTable(data, { schema });
|
||||
|
||||
const result = table.toArray();
|
||||
expect(result).toHaveLength(3);
|
||||
expect(result[0].text).toBe(null);
|
||||
expect(result[0].number).toBe(42);
|
||||
expect(result[0].bool).toBe(true);
|
||||
expect(result[1].text).toBe("hello");
|
||||
expect(result[1].number).toBe(null);
|
||||
expect(result[1].bool).toBe(false);
|
||||
expect(result[2].text).toBe("world");
|
||||
expect(result[2].number).toBe(123);
|
||||
expect(result[2].bool).toBe(null);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -705,7 +705,7 @@ function transposeData(
|
||||
}
|
||||
return current;
|
||||
});
|
||||
return makeVector(values, field.type);
|
||||
return makeVector(values, field.type, undefined, field.nullable);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -752,9 +752,15 @@ function makeVector(
|
||||
values: unknown[],
|
||||
type?: DataType,
|
||||
stringAsDictionary?: boolean,
|
||||
nullable?: boolean,
|
||||
// biome-ignore lint/suspicious/noExplicitAny: skip
|
||||
): Vector<any> {
|
||||
if (type !== undefined) {
|
||||
// Convert undefined values to null for nullable fields
|
||||
if (nullable) {
|
||||
values = values.map((v) => (v === undefined ? null : v));
|
||||
}
|
||||
|
||||
// workaround for: https://github.com/apache/arrow-js/issues/68
|
||||
if (DataType.isBool(type)) {
|
||||
const hasNonNullValue = values.some((v) => v !== null && v !== undefined);
|
||||
@@ -769,6 +775,7 @@ function makeVector(
|
||||
return arrowMakeVector(data);
|
||||
}
|
||||
}
|
||||
|
||||
// No need for inference, let Arrow create it
|
||||
if (type instanceof Int) {
|
||||
if (DataType.isInt(type) && type.bitWidth === 64) {
|
||||
@@ -893,7 +900,12 @@ async function applyEmbeddingsFromMetadata(
|
||||
for (const field of schema.fields) {
|
||||
if (!(field.name in columns)) {
|
||||
const nullValues = new Array(table.numRows).fill(null);
|
||||
columns[field.name] = makeVector(nullValues, field.type);
|
||||
columns[field.name] = makeVector(
|
||||
nullValues,
|
||||
field.type,
|
||||
undefined,
|
||||
field.nullable,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -957,7 +969,12 @@ async function applyEmbeddings<T>(
|
||||
} else if (schema != null) {
|
||||
const destField = schema.fields.find((f) => f.name === destColumn);
|
||||
if (destField != null) {
|
||||
newColumns[destColumn] = makeVector([], destField.type);
|
||||
newColumns[destColumn] = makeVector(
|
||||
[],
|
||||
destField.type,
|
||||
undefined,
|
||||
destField.nullable,
|
||||
);
|
||||
} else {
|
||||
throw new Error(
|
||||
`Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`,
|
||||
|
||||
Reference in New Issue
Block a user