feat: reconfigure typescript linter / formatter for nodejs (#1042)

The eslint rules specify some formatting requirements that are rather
strict and conflict with vscode's default formatter. I was unable to get
auto-formatting to setup correctly. Also, eslint has quite recently
[given up on
formatting](https://eslint.org/blog/2023/10/deprecating-formatting-rules/)
and recommends using a 3rd party formatter.

This PR adds prettier as the formatter. It restores the eslint rules to
their defaults. This does mean we now have the "no explicit any" check
back on. I know that rule is pedantic but it did help me catch a few
corner cases in type testing that weren't covered in the current code.
Leaving in draft as this is dependent on other PRs.
This commit is contained in:
Weston Pace
2024-03-04 10:49:08 -08:00
parent 8033a44d68
commit 785ecfa037
28 changed files with 1048 additions and 691 deletions

View File

@@ -24,27 +24,6 @@ env:
RUST_BACKTRACE: "1"
jobs:
lint:
name: Lint
runs-on: ubuntu-22.04
defaults:
run:
shell: bash
working-directory: node
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- uses: actions/setup-node@v3
with:
node-version: 20
cache: 'npm'
cache-dependency-path: node/package-lock.json
- name: Lint
run: |
npm ci
npm run lint
linux:
name: Linux (Node ${{ matrix.node-version }})
timeout-minutes: 30

View File

@@ -22,7 +22,6 @@ env:
jobs:
lint:
if: false
name: Lint
runs-on: ubuntu-22.04
defaults:
@@ -50,6 +49,7 @@ jobs:
cargo clippy --all --all-features -- -D warnings
npm ci
npm run lint
npm run chkformat
linux:
name: Linux (NodeJS ${{ matrix.node-version }})
timeout-minutes: 30

View File

@@ -10,3 +10,8 @@ repos:
rev: v0.2.2
hooks:
- id: ruff
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v3.1.0
hooks:
- id: prettier
files: "nodejs/.*"

3
nodejs/.eslintignore Normal file
View File

@@ -0,0 +1,3 @@
**/dist/**/*
**/native.js
**/native.d.ts

View File

@@ -1,22 +0,0 @@
module.exports = {
env: {
browser: true,
es2021: true,
},
extends: [
"eslint:recommended",
"plugin:@typescript-eslint/recommended-type-checked",
"plugin:@typescript-eslint/stylistic-type-checked",
],
overrides: [],
parserOptions: {
project: "./tsconfig.json",
ecmaVersion: "latest",
sourceType: "module",
},
rules: {
"@typescript-eslint/method-signature-style": "off",
"@typescript-eslint/no-explicit-any": "off",
},
ignorePatterns: ["node_modules/", "dist/", "build/", "lancedb/native.*"],
};

1
nodejs/.prettierignore Symbolic link
View File

@@ -0,0 +1 @@
.eslintignore

View File

@@ -2,7 +2,6 @@
It will replace the NodeJS SDK when it is ready.
## Development
```sh
@@ -10,9 +9,35 @@ npm run build
npm t
```
Generating docs
### Running lint / format
LanceDb uses eslint for linting. VSCode does not need any plugins to use eslint. However, it
may need some additional configuration. Make sure that eslint.experimental.useFlatConfig is
set to true. Also, if your vscode root folder is the repo root then you will need to set
the eslint.workingDirectories to ["nodejs"]. To manually lint your code you can run:
```sh
npm run lint
```
LanceDb uses prettier for formatting. If you are using VSCode you will need to install the
"Prettier - Code formatter" extension. You should then configure it to be the default formatter
for typescript and you should enable format on save. To manually check your code's format you
can run:
```sh
npm run chkformat
```
If you need to manually format your code you can run:
```sh
npx prettier --write .
```
### Generating docs
```sh
npm run docs
cd ../docs

View File

@@ -12,7 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
import { convertToTable, fromTableToBuffer, makeArrowTable, makeEmptyTable } from '../dist/arrow'
import {
convertToTable,
fromTableToBuffer,
makeArrowTable,
makeEmptyTable,
} from "../dist/arrow";
import {
Field,
FixedSizeList,
@@ -32,291 +37,368 @@ import {
Dictionary,
Int64,
Float,
Precision
} from 'apache-arrow'
import { type EmbeddingFunction } from '../dist/embedding/embedding_function'
Precision,
} from "apache-arrow";
import { type EmbeddingFunction } from "../dist/embedding/embedding_function";
function sampleRecords (): Array<Record<string, any>> {
function sampleRecords(): Array<Record<string, any>> {
return [
{
binary: Buffer.alloc(5),
boolean: false,
number: 7,
string: 'hello',
string: "hello",
struct: { x: 0, y: 0 },
list: ['anime', 'action', 'comedy']
}
]
list: ["anime", "action", "comedy"],
},
];
}
// Helper method to verify various ways to create a table
async function checkTableCreation (tableCreationMethod: (records: any, recordsReversed: any, schema: Schema) => Promise<Table>, infersTypes: boolean): Promise<void> {
const records = sampleRecords()
const recordsReversed = [{
list: ['anime', 'action', 'comedy'],
struct: { x: 0, y: 0 },
string: 'hello',
number: 7,
boolean: false,
binary: Buffer.alloc(5)
}]
async function checkTableCreation(
tableCreationMethod: (
records: any,
recordsReversed: any,
schema: Schema,
) => Promise<Table>,
infersTypes: boolean,
): Promise<void> {
const records = sampleRecords();
const recordsReversed = [
{
list: ["anime", "action", "comedy"],
struct: { x: 0, y: 0 },
string: "hello",
number: 7,
boolean: false,
binary: Buffer.alloc(5),
},
];
const schema = new Schema([
new Field('binary', new Binary(), false),
new Field('boolean', new Bool(), false),
new Field('number', new Float64(), false),
new Field('string', new Utf8(), false),
new Field('struct', new Struct([
new Field('x', new Float64(), false),
new Field('y', new Float64(), false)
])),
new Field('list', new List(new Field('item', new Utf8(), false)), false)
])
new Field("binary", new Binary(), false),
new Field("boolean", new Bool(), false),
new Field("number", new Float64(), false),
new Field("string", new Utf8(), false),
new Field(
"struct",
new Struct([
new Field("x", new Float64(), false),
new Field("y", new Float64(), false),
]),
),
new Field("list", new List(new Field("item", new Utf8(), false)), false),
]);
const table = await tableCreationMethod(records, recordsReversed, schema)
const table = await tableCreationMethod(records, recordsReversed, schema);
schema.fields.forEach((field, idx) => {
const actualField = table.schema.fields[idx]
const actualField = table.schema.fields[idx];
// Type inference always assumes nullable=true
if (infersTypes) {
expect(actualField.nullable).toBe(true)
expect(actualField.nullable).toBe(true);
} else {
expect(actualField.nullable).toBe(false)
expect(actualField.nullable).toBe(false);
}
expect(table.getChild(field.name)?.type.toString()).toEqual(field.type.toString())
expect(table.getChildAt(idx)?.type.toString()).toEqual(field.type.toString())
})
expect(table.getChild(field.name)?.type.toString()).toEqual(
field.type.toString(),
);
expect(table.getChildAt(idx)?.type.toString()).toEqual(
field.type.toString(),
);
});
}
describe('The function makeArrowTable', function () {
it('will use data types from a provided schema instead of inference', async function () {
describe("The function makeArrowTable", function () {
it("will use data types from a provided schema instead of inference", async function () {
const schema = new Schema([
new Field('a', new Int32()),
new Field('b', new Float32()),
new Field('c', new FixedSizeList(3, new Field('item', new Float16()))),
new Field('d', new Int64())
])
new Field("a", new Int32()),
new Field("b", new Float32()),
new Field("c", new FixedSizeList(3, new Field("item", new Float16()))),
new Field("d", new Int64()),
]);
const table = makeArrowTable(
[
{ a: 1, b: 2, c: [1, 2, 3], d: 9 },
{ a: 4, b: 5, c: [4, 5, 6], d: 10 },
{ a: 7, b: 8, c: [7, 8, 9], d: null }
{ a: 7, b: 8, c: [7, 8, 9], d: null },
],
{ schema }
)
{ schema },
);
const buf = await fromTableToBuffer(table)
expect(buf.byteLength).toBeGreaterThan(0)
const buf = await fromTableToBuffer(table);
expect(buf.byteLength).toBeGreaterThan(0);
const actual = tableFromIPC(buf)
expect(actual.numRows).toBe(3)
const actualSchema = actual.schema
expect(actualSchema).toEqual(schema)
})
const actual = tableFromIPC(buf);
expect(actual.numRows).toBe(3);
const actualSchema = actual.schema;
expect(actualSchema).toEqual(schema);
});
it('will assume the column `vector` is FixedSizeList<Float32> by default', async function () {
it("will assume the column `vector` is FixedSizeList<Float32> by default", async function () {
const schema = new Schema([
new Field('a', new Float(Precision.DOUBLE), true),
new Field('b', new Float(Precision.DOUBLE), true),
new Field("a", new Float(Precision.DOUBLE), true),
new Field("b", new Float(Precision.DOUBLE), true),
new Field(
'vector',
new FixedSizeList(3, new Field('item', new Float(Precision.SINGLE), true)),
true
)
])
"vector",
new FixedSizeList(
3,
new Field("item", new Float(Precision.SINGLE), true),
),
true,
),
]);
const table = makeArrowTable([
{ a: 1, b: 2, vector: [1, 2, 3] },
{ a: 4, b: 5, vector: [4, 5, 6] },
{ a: 7, b: 8, vector: [7, 8, 9] }
])
{ a: 7, b: 8, vector: [7, 8, 9] },
]);
const buf = await fromTableToBuffer(table)
expect(buf.byteLength).toBeGreaterThan(0)
const buf = await fromTableToBuffer(table);
expect(buf.byteLength).toBeGreaterThan(0);
const actual = tableFromIPC(buf)
expect(actual.numRows).toBe(3)
const actualSchema = actual.schema
expect(actualSchema).toEqual(schema)
})
const actual = tableFromIPC(buf);
expect(actual.numRows).toBe(3);
const actualSchema = actual.schema;
expect(actualSchema).toEqual(schema);
});
it('can support multiple vector columns', async function () {
it("can support multiple vector columns", async function () {
const schema = new Schema([
new Field('a', new Float(Precision.DOUBLE), true),
new Field('b', new Float(Precision.DOUBLE), true),
new Field('vec1', new FixedSizeList(3, new Field('item', new Float16(), true)), true),
new Field('vec2', new FixedSizeList(3, new Field('item', new Float16(), true)), true)
])
new Field("a", new Float(Precision.DOUBLE), true),
new Field("b", new Float(Precision.DOUBLE), true),
new Field(
"vec1",
new FixedSizeList(3, new Field("item", new Float16(), true)),
true,
),
new Field(
"vec2",
new FixedSizeList(3, new Field("item", new Float16(), true)),
true,
),
]);
const table = makeArrowTable(
[
{ a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
{ a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
{ a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
{ a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] },
],
{
vectorColumns: {
vec1: { type: new Float16() },
vec2: { type: new Float16() }
}
}
)
vec2: { type: new Float16() },
},
},
);
const buf = await fromTableToBuffer(table)
expect(buf.byteLength).toBeGreaterThan(0)
const buf = await fromTableToBuffer(table);
expect(buf.byteLength).toBeGreaterThan(0);
const actual = tableFromIPC(buf)
expect(actual.numRows).toBe(3)
const actualSchema = actual.schema
expect(actualSchema).toEqual(schema)
})
const actual = tableFromIPC(buf);
expect(actual.numRows).toBe(3);
const actualSchema = actual.schema;
expect(actualSchema).toEqual(schema);
});
it('will allow different vector column types', async function () {
const table = makeArrowTable(
[
{ fp16: [1], fp32: [1], fp64: [1] }
],
{
vectorColumns: {
fp16: { type: new Float16() },
fp32: { type: new Float32() },
fp64: { type: new Float64() }
}
}
)
it("will allow different vector column types", async function () {
const table = makeArrowTable([{ fp16: [1], fp32: [1], fp64: [1] }], {
vectorColumns: {
fp16: { type: new Float16() },
fp32: { type: new Float32() },
fp64: { type: new Float64() },
},
});
expect(table.getChild('fp16')?.type.children[0].type.toString()).toEqual(new Float16().toString())
expect(table.getChild('fp32')?.type.children[0].type.toString()).toEqual(new Float32().toString())
expect(table.getChild('fp64')?.type.children[0].type.toString()).toEqual(new Float64().toString())
})
expect(table.getChild("fp16")?.type.children[0].type.toString()).toEqual(
new Float16().toString(),
);
expect(table.getChild("fp32")?.type.children[0].type.toString()).toEqual(
new Float32().toString(),
);
expect(table.getChild("fp64")?.type.children[0].type.toString()).toEqual(
new Float64().toString(),
);
});
it('will use dictionary encoded strings if asked', async function () {
const table = makeArrowTable([{ str: 'hello' }])
expect(DataType.isUtf8(table.getChild('str')?.type)).toBe(true)
it("will use dictionary encoded strings if asked", async function () {
const table = makeArrowTable([{ str: "hello" }]);
expect(DataType.isUtf8(table.getChild("str")?.type)).toBe(true);
const tableWithDict = makeArrowTable([{ str: 'hello' }], { dictionaryEncodeStrings: true })
expect(DataType.isDictionary(tableWithDict.getChild('str')?.type)).toBe(true)
const tableWithDict = makeArrowTable([{ str: "hello" }], {
dictionaryEncodeStrings: true,
});
expect(DataType.isDictionary(tableWithDict.getChild("str")?.type)).toBe(
true,
);
const schema = new Schema([
new Field('str', new Dictionary(new Utf8(), new Int32()))
])
new Field("str", new Dictionary(new Utf8(), new Int32())),
]);
const tableWithDict2 = makeArrowTable([{ str: 'hello' }], { schema })
expect(DataType.isDictionary(tableWithDict2.getChild('str')?.type)).toBe(true)
})
const tableWithDict2 = makeArrowTable([{ str: "hello" }], { schema });
expect(DataType.isDictionary(tableWithDict2.getChild("str")?.type)).toBe(
true,
);
});
it('will infer data types correctly', async function () {
await checkTableCreation(async (records) => makeArrowTable(records), true)
})
it("will infer data types correctly", async function () {
await checkTableCreation(async (records) => makeArrowTable(records), true);
});
it('will allow a schema to be provided', async function () {
await checkTableCreation(async (records, _, schema) => makeArrowTable(records, { schema }), false)
})
it("will allow a schema to be provided", async function () {
await checkTableCreation(
async (records, _, schema) => makeArrowTable(records, { schema }),
false,
);
});
it('will use the field order of any provided schema', async function () {
await checkTableCreation(async (_, recordsReversed, schema) => makeArrowTable(recordsReversed, { schema }), false)
})
it("will use the field order of any provided schema", async function () {
await checkTableCreation(
async (_, recordsReversed, schema) =>
makeArrowTable(recordsReversed, { schema }),
false,
);
});
it('will make an empty table', async function () {
await checkTableCreation(async (_, __, schema) => makeArrowTable([], { schema }), false)
})
})
it("will make an empty table", async function () {
await checkTableCreation(
async (_, __, schema) => makeArrowTable([], { schema }),
false,
);
});
});
class DummyEmbedding implements EmbeddingFunction<string> {
public readonly sourceColumn = 'string'
public readonly embeddingDimension = 2
public readonly embeddingDataType = new Float16()
public readonly sourceColumn = "string";
public readonly embeddingDimension = 2;
public readonly embeddingDataType = new Float16();
async embed (data: string[]): Promise<number[][]> {
return data.map(
() => [0.0, 0.0]
)
async embed(data: string[]): Promise<number[][]> {
return data.map(() => [0.0, 0.0]);
}
}
class DummyEmbeddingWithNoDimension implements EmbeddingFunction<string> {
public readonly sourceColumn = 'string'
public readonly sourceColumn = "string";
async embed (data: string[]): Promise<number[][]> {
return data.map(
() => [0.0, 0.0]
)
async embed(data: string[]): Promise<number[][]> {
return data.map(() => [0.0, 0.0]);
}
}
describe('convertToTable', function () {
it('will infer data types correctly', async function () {
await checkTableCreation(async (records) => await convertToTable(records), true)
})
describe("convertToTable", function () {
it("will infer data types correctly", async function () {
await checkTableCreation(
async (records) => await convertToTable(records),
true,
);
});
it('will allow a schema to be provided', async function () {
await checkTableCreation(async (records, _, schema) => await convertToTable(records, undefined, { schema }), false)
})
it("will allow a schema to be provided", async function () {
await checkTableCreation(
async (records, _, schema) =>
await convertToTable(records, undefined, { schema }),
false,
);
});
it('will use the field order of any provided schema', async function () {
await checkTableCreation(async (_, recordsReversed, schema) => await convertToTable(recordsReversed, undefined, { schema }), false)
})
it("will use the field order of any provided schema", async function () {
await checkTableCreation(
async (_, recordsReversed, schema) =>
await convertToTable(recordsReversed, undefined, { schema }),
false,
);
});
it('will make an empty table', async function () {
await checkTableCreation(async (_, __, schema) => await convertToTable([], undefined, { schema }), false)
})
it("will make an empty table", async function () {
await checkTableCreation(
async (_, __, schema) => await convertToTable([], undefined, { schema }),
false,
);
});
it('will apply embeddings', async function () {
const records = sampleRecords()
const table = await convertToTable(records, new DummyEmbedding())
expect(DataType.isFixedSizeList(table.getChild('vector')?.type)).toBe(true)
expect(table.getChild('vector')?.type.children[0].type.toString()).toEqual(new Float16().toString())
})
it("will apply embeddings", async function () {
const records = sampleRecords();
const table = await convertToTable(records, new DummyEmbedding());
expect(DataType.isFixedSizeList(table.getChild("vector")?.type)).toBe(true);
expect(table.getChild("vector")?.type.children[0].type.toString()).toEqual(
new Float16().toString(),
);
});
it('will fail if missing the embedding source column', async function () {
await expect(convertToTable([{ id: 1 }], new DummyEmbedding())).rejects.toThrow("'string' was not present")
})
it("will fail if missing the embedding source column", async function () {
await expect(
convertToTable([{ id: 1 }], new DummyEmbedding()),
).rejects.toThrow("'string' was not present");
});
it('use embeddingDimension if embedding missing from table', async function () {
const schema = new Schema([
new Field('string', new Utf8(), false)
])
it("use embeddingDimension if embedding missing from table", async function () {
const schema = new Schema([new Field("string", new Utf8(), false)]);
// Simulate getting an empty Arrow table (minus embedding) from some other source
// In other words, we aren't starting with records
const table = makeEmptyTable(schema)
const table = makeEmptyTable(schema);
// If the embedding specifies the dimension we are fine
await fromTableToBuffer(table, new DummyEmbedding())
await fromTableToBuffer(table, new DummyEmbedding());
// We can also supply a schema and should be ok
const schemaWithEmbedding = new Schema([
new Field('string', new Utf8(), false),
new Field('vector', new FixedSizeList(2, new Field('item', new Float16(), false)), false)
])
await fromTableToBuffer(table, new DummyEmbeddingWithNoDimension(), schemaWithEmbedding)
new Field("string", new Utf8(), false),
new Field(
"vector",
new FixedSizeList(2, new Field("item", new Float16(), false)),
false,
),
]);
await fromTableToBuffer(
table,
new DummyEmbeddingWithNoDimension(),
schemaWithEmbedding,
);
// Otherwise we will get an error
await expect(fromTableToBuffer(table, new DummyEmbeddingWithNoDimension())).rejects.toThrow('does not specify `embeddingDimension`')
})
await expect(
fromTableToBuffer(table, new DummyEmbeddingWithNoDimension()),
).rejects.toThrow("does not specify `embeddingDimension`");
});
it('will apply embeddings to an empty table', async function () {
it("will apply embeddings to an empty table", async function () {
const schema = new Schema([
new Field('string', new Utf8(), false),
new Field('vector', new FixedSizeList(2, new Field('item', new Float16(), false)), false)
])
const table = await convertToTable([], new DummyEmbedding(), { schema })
expect(DataType.isFixedSizeList(table.getChild('vector')?.type)).toBe(true)
expect(table.getChild('vector')?.type.children[0].type.toString()).toEqual(new Float16().toString())
})
new Field("string", new Utf8(), false),
new Field(
"vector",
new FixedSizeList(2, new Field("item", new Float16(), false)),
false,
),
]);
const table = await convertToTable([], new DummyEmbedding(), { schema });
expect(DataType.isFixedSizeList(table.getChild("vector")?.type)).toBe(true);
expect(table.getChild("vector")?.type.children[0].type.toString()).toEqual(
new Float16().toString(),
);
});
it('will complain if embeddings present but schema missing embedding column', async function () {
const schema = new Schema([
new Field('string', new Utf8(), false)
])
await expect(convertToTable([], new DummyEmbedding(), { schema })).rejects.toThrow('column vector was missing')
})
it("will complain if embeddings present but schema missing embedding column", async function () {
const schema = new Schema([new Field("string", new Utf8(), false)]);
await expect(
convertToTable([], new DummyEmbedding(), { schema }),
).rejects.toThrow("column vector was missing");
});
it('will provide a nice error if run twice', async function () {
const records = sampleRecords()
const table = await convertToTable(records, new DummyEmbedding())
it("will provide a nice error if run twice", async function () {
const records = sampleRecords();
const table = await convertToTable(records, new DummyEmbedding());
// fromTableToBuffer will try and apply the embeddings again
await expect(fromTableToBuffer(table, new DummyEmbedding())).rejects.toThrow('already existed')
})
})
await expect(
fromTableToBuffer(table, new DummyEmbedding()),
).rejects.toThrow("already existed");
});
});
describe('makeEmptyTable', function () {
it('will make an empty table', async function () {
await checkTableCreation(async (_, __, schema) => makeEmptyTable(schema), false)
})
})
describe("makeEmptyTable", function () {
it("will make an empty table", async function () {
await checkTableCreation(
async (_, __, schema) => makeEmptyTable(schema),
false,
);
});
});

View File

@@ -17,55 +17,58 @@ import * as tmp from "tmp";
import { Connection, connect } from "../dist/index.js";
describe("when connecting", () => {
let tmpDir: tmp.DirResult;
beforeEach(() => tmpDir = tmp.dirSync({ unsafeCleanup: true }));
beforeEach(() => (tmpDir = tmp.dirSync({ unsafeCleanup: true })));
afterEach(() => tmpDir.removeCallback());
it("should connect", async() => {
it("should connect", async () => {
const db = await connect(tmpDir.name);
expect(db.display()).toBe(`NativeDatabase(uri=${tmpDir.name}, read_consistency_interval=None)`);
})
expect(db.display()).toBe(
`NativeDatabase(uri=${tmpDir.name}, read_consistency_interval=None)`,
);
});
it("should allow read consistency interval to be specified", async() => {
const db = await connect(tmpDir.name, { readConsistencyInterval: 5});
expect(db.display()).toBe(`NativeDatabase(uri=${tmpDir.name}, read_consistency_interval=5s)`);
})
it("should allow read consistency interval to be specified", async () => {
const db = await connect(tmpDir.name, { readConsistencyInterval: 5 });
expect(db.display()).toBe(
`NativeDatabase(uri=${tmpDir.name}, read_consistency_interval=5s)`,
);
});
});
describe("given a connection", () => {
let tmpDir: tmp.DirResult
let db: Connection
let tmpDir: tmp.DirResult;
let db: Connection;
beforeEach(async () => {
tmpDir = tmp.dirSync({ unsafeCleanup: true });
db = await connect(tmpDir.name)
db = await connect(tmpDir.name);
});
afterEach(() => tmpDir.removeCallback());
it("should raise an error if opening a non-existent table", async() => {
it("should raise an error if opening a non-existent table", async () => {
await expect(db.openTable("non-existent")).rejects.toThrow("was not found");
})
});
it("should raise an error if any operation is tried after it is closed", async() => {
it("should raise an error if any operation is tried after it is closed", async () => {
expect(db.isOpen()).toBe(true);
await db.close();
expect(db.isOpen()).toBe(false);
await expect(db.tableNames()).rejects.toThrow("Connection is closed");
})
});
it("should fail if creating table twice, unless overwrite is true", async() => {
it("should fail if creating table twice, unless overwrite is true", async () => {
let tbl = await db.createTable("test", [{ id: 1 }, { id: 2 }]);
await expect(tbl.countRows()).resolves.toBe(2);
await expect(db.createTable("test", [{ id: 1 }, { id: 2 }])).rejects.toThrow();
await expect(
db.createTable("test", [{ id: 1 }, { id: 2 }]),
).rejects.toThrow();
tbl = await db.createTable("test", [{ id: 3 }], { mode: "overwrite" });
await expect(tbl.countRows()).resolves.toBe(1);
})
});
it("should list tables", async() => {
it("should list tables", async () => {
await db.createTable("test2", [{ id: 1 }, { id: 2 }]);
await db.createTable("test1", [{ id: 1 }, { id: 2 }]);
expect(await db.tableNames()).toEqual(["test1", "test2"]);
})
});
});

View File

@@ -17,15 +17,21 @@ import * as path from "path";
import * as tmp from "tmp";
import { Table, connect } from "../dist";
import { Schema, Field, Float32, Int32, FixedSizeList, Int64, Float64 } from "apache-arrow";
import {
Schema,
Field,
Float32,
Int32,
FixedSizeList,
Int64,
Float64,
} from "apache-arrow";
import { makeArrowTable } from "../dist/arrow";
describe("Given a table", () => {
let tmpDir: tmp.DirResult;
let table: Table;
const schema = new Schema([
new Field("id", new Float64(), true),
]);
const schema = new Schema([new Field("id", new Float64(), true)]);
beforeEach(async () => {
tmpDir = tmp.dirSync({ unsafeCleanup: true });
const conn = await connect(tmpDir.name);
@@ -34,31 +40,32 @@ describe("Given a table", () => {
afterEach(() => tmpDir.removeCallback());
it("be displayable", async () => {
expect(table.display()).toMatch(/NativeTable\(some_table, uri=.*, read_consistency_interval=None\)/);
table.close()
expect(table.display()).toBe("ClosedTable(some_table)")
})
expect(table.display()).toMatch(
/NativeTable\(some_table, uri=.*, read_consistency_interval=None\)/,
);
table.close();
expect(table.display()).toBe("ClosedTable(some_table)");
});
it("should let me add data", async () => {
await table.add([{ id: 1 }, { id: 2 }]);
await table.add([{ id: 1 }]);
await expect(table.countRows()).resolves.toBe(3);
})
});
it("should overwrite data if asked", async () => {
await table.add([{ id: 1 }, { id: 2 }]);
await table.add([{ id: 1 }], { mode: "overwrite" });
await expect(table.countRows()).resolves.toBe(1);
})
});
it("should let me close the table", async () => {
expect(table.isOpen()).toBe(true);
table.close();
expect(table.isOpen()).toBe(false);
expect(table.countRows()).rejects.toThrow("Table some_table is closed");
})
})
});
});
describe("Test creating index", () => {
let tmpDir: tmp.DirResult;
@@ -85,7 +92,7 @@ describe("Test creating index", () => {
})),
{
schema,
}
},
);
const tbl = await db.createTable("test", data);
await tbl.createIndex().build();
@@ -113,10 +120,10 @@ describe("Test creating index", () => {
makeArrowTable([
{ id: 1, val: 2 },
{ id: 2, val: 3 },
])
]),
);
await expect(tbl.createIndex().build()).rejects.toThrow(
"No vector column found"
"No vector column found",
);
await tbl.createIndex("val").build();
@@ -135,7 +142,7 @@ describe("Test creating index", () => {
new Field("vec", new FixedSizeList(32, new Field("item", new Float32()))),
new Field(
"vec2",
new FixedSizeList(64, new Field("item", new Float32()))
new FixedSizeList(64, new Field("item", new Float32())),
),
]);
const tbl = await db.createTable(
@@ -152,13 +159,13 @@ describe("Test creating index", () => {
.fill(1)
.map(() => Math.random()),
})),
{ schema }
)
{ schema },
),
);
// Only build index over v1
await expect(tbl.createIndex().build()).rejects.toThrow(
/.*More than one vector columns found.*/
/.*More than one vector columns found.*/,
);
tbl
.createIndex("vec")
@@ -170,7 +177,7 @@ describe("Test creating index", () => {
.nearestTo(
Array(32)
.fill(1)
.map(() => Math.random())
.map(() => Math.random()),
)
.limit(2)
.toArrow();
@@ -183,10 +190,10 @@ describe("Test creating index", () => {
Array(64)
.fill(1)
.map(() => Math.random()),
"vec"
"vec",
)
.limit(2)
.toArrow()
.toArrow(),
).rejects.toThrow(/.*does not match the dimension.*/);
const query64 = Array(64)
@@ -211,7 +218,7 @@ describe("Test creating index", () => {
})),
{
schema,
}
},
);
const tbl = await db.createTable("test", data);
await tbl.createIndex("id").build();
@@ -224,7 +231,6 @@ describe("Test creating index", () => {
});
describe("Read consistency interval", () => {
let tmpDir: tmp.DirResult;
beforeEach(() => {
tmpDir = tmp.dirSync({ unsafeCleanup: true });
@@ -237,7 +243,9 @@ describe("Read consistency interval", () => {
const db = await connect(tmpDir.name);
const table = await db.createTable("my_table", [{ id: 1 }]);
const db2 = await connect(tmpDir.name, { readConsistencyInterval: interval });
const db2 = await connect(tmpDir.name, {
readConsistencyInterval: interval,
});
const table2 = await db2.openTable("my_table");
expect(await table2.countRows()).toEqual(await table.countRows());
@@ -253,77 +261,89 @@ describe("Read consistency interval", () => {
} else {
// interval == 0.1
expect(await table2.countRows()).toEqual(1);
await new Promise(r => setTimeout(r, 100));
await new Promise((r) => setTimeout(r, 100));
expect(await table2.countRows()).toEqual(2);
}
});
});
describe('schema evolution', function () {
describe("schema evolution", function () {
let tmpDir: tmp.DirResult;
beforeEach(() => {
tmpDir = tmp.dirSync({ unsafeCleanup: true });
});
afterEach(() => {
tmpDir.removeCallback();
})
});
// Create a new sample table
it('can add a new column to the schema', async function () {
const con = await connect(tmpDir.name)
const table = await con.createTable('vectors', [
{ id: 1n, vector: [0.1, 0.2] }
])
it("can add a new column to the schema", async function () {
const con = await connect(tmpDir.name);
const table = await con.createTable("vectors", [
{ id: 1n, vector: [0.1, 0.2] },
]);
await table.addColumns([{ name: 'price', valueSql: 'cast(10.0 as float)' }])
await table.addColumns([
{ name: "price", valueSql: "cast(10.0 as float)" },
]);
const expectedSchema = new Schema([
new Field('id', new Int64(), true),
new Field('vector', new FixedSizeList(2, new Field('item', new Float32(), true)), true),
new Field('price', new Float32(), false)
])
expect(await table.schema()).toEqual(expectedSchema)
new Field("id", new Int64(), true),
new Field(
"vector",
new FixedSizeList(2, new Field("item", new Float32(), true)),
true,
),
new Field("price", new Float32(), false),
]);
expect(await table.schema()).toEqual(expectedSchema);
});
it('can alter the columns in the schema', async function () {
const con = await connect(tmpDir.name)
it("can alter the columns in the schema", async function () {
const con = await connect(tmpDir.name);
const schema = new Schema([
new Field('id', new Int64(), true),
new Field('vector', new FixedSizeList(2, new Field('item', new Float32(), true)), true),
new Field('price', new Float64(), false)
])
const table = await con.createTable('vectors', [
{ id: 1n, vector: [0.1, 0.2] }
])
new Field("id", new Int64(), true),
new Field(
"vector",
new FixedSizeList(2, new Field("item", new Float32(), true)),
true,
),
new Field("price", new Float64(), false),
]);
const table = await con.createTable("vectors", [
{ id: 1n, vector: [0.1, 0.2] },
]);
// Can create a non-nullable column only through addColumns at the moment.
await table.addColumns([{ name: 'price', valueSql: 'cast(10.0 as double)' }])
expect(await table.schema()).toEqual(schema)
await table.addColumns([
{ name: "price", valueSql: "cast(10.0 as double)" },
]);
expect(await table.schema()).toEqual(schema);
await table.alterColumns([
{ path: 'id', rename: 'new_id' },
{ path: 'price', nullable: true }
])
{ path: "id", rename: "new_id" },
{ path: "price", nullable: true },
]);
const expectedSchema = new Schema([
new Field('new_id', new Int64(), true),
new Field('vector', new FixedSizeList(2, new Field('item', new Float32(), true)), true),
new Field('price', new Float64(), true)
])
expect(await table.schema()).toEqual(expectedSchema)
new Field("new_id", new Int64(), true),
new Field(
"vector",
new FixedSizeList(2, new Field("item", new Float32(), true)),
true,
),
new Field("price", new Float64(), true),
]);
expect(await table.schema()).toEqual(expectedSchema);
});
it('can drop a column from the schema', async function () {
const con = await connect(tmpDir.name)
const table = await con.createTable('vectors', [
{ id: 1n, vector: [0.1, 0.2] }
])
await table.dropColumns(['vector'])
it("can drop a column from the schema", async function () {
const con = await connect(tmpDir.name);
const table = await con.createTable("vectors", [
{ id: 1n, vector: [0.1, 0.2] },
]);
await table.dropColumns(["vector"]);
const expectedSchema = new Schema([
new Field('id', new Int64(), true)
])
expect(await table.schema()).toEqual(expectedSchema)
const expectedSchema = new Schema([new Field("id", new Int64(), true)]);
expect(await table.schema()).toEqual(expectedSchema);
});
});

View File

@@ -1,15 +1,10 @@
{
"extends": "../tsconfig.json",
"compilerOptions": {
"outDir": "./dist/spec",
"module": "commonjs",
"target": "es2022",
"types": [
"jest",
"node"
]
},
"include": [
"**/*",
]
"extends": "../tsconfig.json",
"compilerOptions": {
"outDir": "./dist/spec",
"module": "commonjs",
"target": "es2022",
"types": ["jest", "node"]
},
"include": ["**/*"]
}

11
nodejs/eslint.config.js Normal file
View File

@@ -0,0 +1,11 @@
// @ts-check
const eslint = require("@eslint/js");
const tseslint = require("typescript-eslint");
const eslintConfigPrettier = require("eslint-config-prettier");
module.exports = tseslint.config(
eslint.configs.recommended,
eslintConfigPrettier,
...tseslint.configs.recommended,
);

View File

@@ -1,7 +1,7 @@
/** @type {import('ts-jest').JestConfigWithTsJest} */
module.exports = {
preset: 'ts-jest',
testEnvironment: 'node',
preset: "ts-jest",
testEnvironment: "node",
moduleDirectories: ["node_modules", "./dist"],
moduleFileExtensions: ["js", "ts"],
};

View File

@@ -30,10 +30,9 @@ import {
type Float,
DataType,
Binary,
Float32
} from 'apache-arrow'
import { type EmbeddingFunction } from './embedding/embedding_function'
import { Table } from './native';
Float32,
} from "apache-arrow";
import { type EmbeddingFunction } from "./embedding/embedding_function";
/** Data type accepted by NodeJS SDK */
export type Data = Record<string, unknown>[] | ArrowTable;
@@ -43,10 +42,10 @@ export type Data = Record<string, unknown>[] | ArrowTable;
*/
export class VectorColumnOptions {
/** Vector column type. */
type: Float = new Float32()
type: Float = new Float32();
constructor (values?: Partial<VectorColumnOptions>) {
Object.assign(this, values)
constructor(values?: Partial<VectorColumnOptions>) {
Object.assign(this, values);
}
}
@@ -63,7 +62,7 @@ export class MakeArrowTableOptions {
* The schema must be specified if there are no records (e.g. to make
* an empty table)
*/
schema?: Schema
schema?: Schema;
/*
* Mapping from vector column name to expected type
@@ -82,8 +81,8 @@ export class MakeArrowTableOptions {
* vector column.
*/
vectorColumns: Record<string, VectorColumnOptions> = {
vector: new VectorColumnOptions()
}
vector: new VectorColumnOptions(),
};
/**
* If true then string columns will be encoded with dictionary encoding
@@ -94,10 +93,10 @@ export class MakeArrowTableOptions {
*
* If `schema` is provided then this property is ignored.
*/
dictionaryEncodeStrings: boolean = false
dictionaryEncodeStrings: boolean = false;
constructor (values?: Partial<MakeArrowTableOptions>) {
Object.assign(this, values)
constructor(values?: Partial<MakeArrowTableOptions>) {
Object.assign(this, values);
}
}
@@ -197,56 +196,83 @@ export class MakeArrowTableOptions {
* assert.deepEqual(table.schema, schema)
* ```
*/
export function makeArrowTable (
data: Array<Record<string, any>>,
options?: Partial<MakeArrowTableOptions>
export function makeArrowTable(
data: Array<Record<string, unknown>>,
options?: Partial<MakeArrowTableOptions>,
): ArrowTable {
if (data.length === 0 && (options?.schema === undefined || options?.schema === null)) {
throw new Error('At least one record or a schema needs to be provided')
if (
data.length === 0 &&
(options?.schema === undefined || options?.schema === null)
) {
throw new Error("At least one record or a schema needs to be provided");
}
const opt = new MakeArrowTableOptions(options !== undefined ? options : {})
const columns: Record<string, Vector> = {}
const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
const columns: Record<string, Vector> = {};
// TODO: sample dataset to find missing columns
// Prefer the field ordering of the schema, if present
const columnNames = ((options?.schema) != null) ? (options?.schema?.names as string[]) : Object.keys(data[0])
const columnNames =
options?.schema != null
? (options?.schema?.names as string[])
: Object.keys(data[0]);
for (const colName of columnNames) {
if (data.length !== 0 && !Object.prototype.hasOwnProperty.call(data[0], colName)) {
if (
data.length !== 0 &&
!Object.prototype.hasOwnProperty.call(data[0], colName)
) {
// The field is present in the schema, but not in the data, skip it
continue
continue;
}
// Extract a single column from the records (transpose from row-major to col-major)
let values = data.map((datum) => datum[colName])
let values = data.map((datum) => datum[colName]);
// By default (type === undefined) arrow will infer the type from the JS type
let type
let type;
if (opt.schema !== undefined) {
// If there is a schema provided, then use that for the type instead
type = opt.schema?.fields.filter((f) => f.name === colName)[0]?.type
type = opt.schema?.fields.filter((f) => f.name === colName)[0]?.type;
if (DataType.isInt(type) && type.bitWidth === 64) {
// wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
values = values.map((v) => {
if (v === null) {
return v
return v;
}
return BigInt(v)
})
if (typeof v === "bigint") {
return v;
}
if (typeof v === "number") {
return BigInt(v);
}
throw new Error(
`Expected BigInt or number for column ${colName}, got ${typeof v}`,
);
});
}
} else {
// Otherwise, check to see if this column is one of the vector columns
// defined by opt.vectorColumns and, if so, use the fixed size list type
const vectorColumnOptions = opt.vectorColumns[colName]
const vectorColumnOptions = opt.vectorColumns[colName];
if (vectorColumnOptions !== undefined) {
type = newVectorType(values[0].length, vectorColumnOptions.type)
const firstNonNullValue = values.find((v) => v !== null);
if (Array.isArray(firstNonNullValue)) {
type = newVectorType(
firstNonNullValue.length,
vectorColumnOptions.type,
);
} else {
throw new Error(
`Column ${colName} is expected to be a vector column but first non-null value is not an array. Could not determine size of vector column`,
);
}
}
}
try {
// Convert an Array of JS values to an arrow vector
columns[colName] = makeVector(values, type, opt.dictionaryEncodeStrings)
columns[colName] = makeVector(values, type, opt.dictionaryEncodeStrings);
} catch (error: unknown) {
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
throw Error(`Could not convert column "${colName}" to Arrow: ${error}`)
throw Error(`Could not convert column "${colName}" to Arrow: ${error}`);
}
}
@@ -261,94 +287,116 @@ export function makeArrowTable (
// To work around this we first create a table with the wrong schema and
// then patch the schema of the batches so we can use
// `new ArrowTable(schema, batches)` which does not do any schema inference
const firstTable = new ArrowTable(columns)
const firstTable = new ArrowTable(columns);
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const batchesFixed = firstTable.batches.map(batch => new RecordBatch(opt.schema!, batch.data))
return new ArrowTable(opt.schema, batchesFixed)
const batchesFixed = firstTable.batches.map(
(batch) => new RecordBatch(opt.schema!, batch.data),
);
return new ArrowTable(opt.schema, batchesFixed);
} else {
return new ArrowTable(columns)
return new ArrowTable(columns);
}
}
/**
* Create an empty Arrow table with the provided schema
*/
export function makeEmptyTable (schema: Schema): ArrowTable {
return makeArrowTable([], { schema })
export function makeEmptyTable(schema: Schema): ArrowTable {
return makeArrowTable([], { schema });
}
// Helper function to convert Array<Array<any>> to a variable sized list array
function makeListVector (lists: any[][]): Vector<any> {
// @ts-expect-error (Vector<unknown> is not assignable to Vector<any>)
function makeListVector(lists: unknown[][]): Vector<unknown> {
if (lists.length === 0 || lists[0].length === 0) {
throw Error('Cannot infer list vector from empty array or empty list')
throw Error("Cannot infer list vector from empty array or empty list");
}
const sampleList = lists[0]
let inferredType
const sampleList = lists[0];
// eslint-disable-next-line @typescript-eslint/no-explicit-any
let inferredType: any;
try {
const sampleVector = makeVector(sampleList)
inferredType = sampleVector.type
const sampleVector = makeVector(sampleList);
inferredType = sampleVector.type;
} catch (error: unknown) {
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
throw Error(`Cannot infer list vector. Cannot infer inner type: ${error}`)
throw Error(`Cannot infer list vector. Cannot infer inner type: ${error}`);
}
const listBuilder = makeBuilder({
type: new List(new Field('item', inferredType, true))
})
type: new List(new Field("item", inferredType, true)),
});
for (const list of lists) {
listBuilder.append(list)
listBuilder.append(list);
}
return listBuilder.finish().toVector()
return listBuilder.finish().toVector();
}
// Helper function to convert an Array of JS values to an Arrow Vector
function makeVector (values: any[], type?: DataType, stringAsDictionary?: boolean): Vector<any> {
function makeVector(
values: unknown[],
type?: DataType,
stringAsDictionary?: boolean,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
): Vector<any> {
if (type !== undefined) {
// No need for inference, let Arrow create it
return vectorFromArray(values, type)
return vectorFromArray(values, type);
}
if (values.length === 0) {
throw Error('makeVector requires at least one value or the type must be specfied')
throw Error(
"makeVector requires at least one value or the type must be specfied",
);
}
const sampleValue = values.find(val => val !== null && val !== undefined)
const sampleValue = values.find((val) => val !== null && val !== undefined);
if (sampleValue === undefined) {
throw Error('makeVector cannot infer the type if all values are null or undefined')
throw Error(
"makeVector cannot infer the type if all values are null or undefined",
);
}
if (Array.isArray(sampleValue)) {
// Default Arrow inference doesn't handle list types
return makeListVector(values)
return makeListVector(values as unknown[][]);
} else if (Buffer.isBuffer(sampleValue)) {
// Default Arrow inference doesn't handle Buffer
return vectorFromArray(values, new Binary())
} else if (!(stringAsDictionary ?? false) && (typeof sampleValue === 'string' || sampleValue instanceof String)) {
return vectorFromArray(values, new Binary());
} else if (
!(stringAsDictionary ?? false) &&
(typeof sampleValue === "string" || sampleValue instanceof String)
) {
// If the type is string then don't use Arrow's default inference unless dictionaries are requested
// because it will always use dictionary encoding for strings
return vectorFromArray(values, new Utf8())
return vectorFromArray(values, new Utf8());
} else {
// Convert a JS array of values to an arrow vector
return vectorFromArray(values)
return vectorFromArray(values);
}
}
async function applyEmbeddings<T> (table: ArrowTable, embeddings?: EmbeddingFunction<T>, schema?: Schema): Promise<ArrowTable> {
async function applyEmbeddings<T>(
table: ArrowTable,
embeddings?: EmbeddingFunction<T>,
schema?: Schema,
): Promise<ArrowTable> {
if (embeddings == null) {
return table
return table;
}
// Convert from ArrowTable to Record<String, Vector>
const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
const name = table.schema.fields[idx].name
const name = table.schema.fields[idx].name;
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const vec = table.getChildAt(idx)!
return [name, vec]
})
const newColumns = Object.fromEntries(colEntries)
const vec = table.getChildAt(idx)!;
return [name, vec];
});
const newColumns = Object.fromEntries(colEntries);
const sourceColumn = newColumns[embeddings.sourceColumn]
const destColumn = embeddings.destColumn ?? 'vector'
const innerDestType = embeddings.embeddingDataType ?? new Float32()
const sourceColumn = newColumns[embeddings.sourceColumn];
const destColumn = embeddings.destColumn ?? "vector";
const innerDestType = embeddings.embeddingDataType ?? new Float32();
if (sourceColumn === undefined) {
throw new Error(`Cannot apply embedding function because the source column '${embeddings.sourceColumn}' was not present in the data`)
throw new Error(
`Cannot apply embedding function because the source column '${embeddings.sourceColumn}' was not present in the data`,
);
}
if (table.numRows === 0) {
@@ -356,45 +404,60 @@ async function applyEmbeddings<T> (table: ArrowTable, embeddings?: EmbeddingFunc
// We have an empty table and it already has the embedding column so no work needs to be done
// Note: we don't return an error like we did below because this is a common occurrence. For example,
// if we call convertToTable with 0 records and a schema that includes the embedding
return table
return table;
}
if (embeddings.embeddingDimension !== undefined) {
const destType = newVectorType(embeddings.embeddingDimension, innerDestType)
newColumns[destColumn] = makeVector([], destType)
const destType = newVectorType(
embeddings.embeddingDimension,
innerDestType,
);
newColumns[destColumn] = makeVector([], destType);
} else if (schema != null) {
const destField = schema.fields.find(f => f.name === destColumn)
const destField = schema.fields.find((f) => f.name === destColumn);
if (destField != null) {
newColumns[destColumn] = makeVector([], destField.type)
newColumns[destColumn] = makeVector([], destField.type);
} else {
throw new Error(`Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`)
throw new Error(
`Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`,
);
}
} else {
throw new Error('Attempt to apply embeddings to an empty table when the embeddings function does not specify `embeddingDimension`')
throw new Error(
"Attempt to apply embeddings to an empty table when the embeddings function does not specify `embeddingDimension`",
);
}
} else {
if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
throw new Error(`Attempt to apply embeddings to table failed because column ${destColumn} already existed`)
throw new Error(
`Attempt to apply embeddings to table failed because column ${destColumn} already existed`,
);
}
if (table.batches.length > 1) {
throw new Error('Internal error: `makeArrowTable` unexpectedly created a table with more than one batch')
throw new Error(
"Internal error: `makeArrowTable` unexpectedly created a table with more than one batch",
);
}
const values = sourceColumn.toArray()
const vectors = await embeddings.embed(values as T[])
const values = sourceColumn.toArray();
const vectors = await embeddings.embed(values as T[]);
if (vectors.length !== values.length) {
throw new Error('Embedding function did not return an embedding for each input element')
throw new Error(
"Embedding function did not return an embedding for each input element",
);
}
const destType = newVectorType(vectors[0].length, innerDestType)
newColumns[destColumn] = makeVector(vectors, destType)
const destType = newVectorType(vectors[0].length, innerDestType);
newColumns[destColumn] = makeVector(vectors, destType);
}
const newTable = new ArrowTable(newColumns)
const newTable = new ArrowTable(newColumns);
if (schema != null) {
if (schema.fields.find(f => f.name === destColumn) === undefined) {
throw new Error(`When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`)
if (schema.fields.find((f) => f.name === destColumn) === undefined) {
throw new Error(
`When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`,
);
}
return alignTable(newTable, schema)
return alignTable(newTable, schema);
}
return newTable
return newTable;
}
/*
@@ -415,21 +478,24 @@ async function applyEmbeddings<T> (table: ArrowTable, embeddings?: EmbeddingFunc
* embedding columns. If no schema is provded then embedding columns will
* be placed at the end of the table, after all of the input columns.
*/
export async function convertToTable<T> (
export async function convertToTable<T>(
data: Array<Record<string, unknown>>,
embeddings?: EmbeddingFunction<T>,
makeTableOptions?: Partial<MakeArrowTableOptions>
makeTableOptions?: Partial<MakeArrowTableOptions>,
): Promise<ArrowTable> {
const table = makeArrowTable(data, makeTableOptions)
return await applyEmbeddings(table, embeddings, makeTableOptions?.schema)
const table = makeArrowTable(data, makeTableOptions);
return await applyEmbeddings(table, embeddings, makeTableOptions?.schema);
}
// Creates the Arrow Type for a Vector column with dimension `dim`
function newVectorType <T extends Float> (dim: number, innerType: T): FixedSizeList<T> {
function newVectorType<T extends Float>(
dim: number,
innerType: T,
): FixedSizeList<T> {
// in Lance we always default to have the elements nullable, so we need to set it to true
// otherwise we often get schema mismatches because the stored data always has schema with nullable elements
const children = new Field<T>('item', innerType, true)
return new FixedSizeList(dim, children)
const children = new Field<T>("item", innerType, true);
return new FixedSizeList(dim, children);
}
/**
@@ -439,14 +505,14 @@ function newVectorType <T extends Float> (dim: number, innerType: T): FixedSizeL
*
* `schema` is required if data is empty
*/
export async function fromRecordsToBuffer<T> (
export async function fromRecordsToBuffer<T>(
data: Array<Record<string, unknown>>,
embeddings?: EmbeddingFunction<T>,
schema?: Schema
schema?: Schema,
): Promise<Buffer> {
const table = await convertToTable(data, embeddings, { schema })
const writer = RecordBatchFileWriter.writeAll(table)
return Buffer.from(await writer.toUint8Array())
const table = await convertToTable(data, embeddings, { schema });
const writer = RecordBatchFileWriter.writeAll(table);
return Buffer.from(await writer.toUint8Array());
}
/**
@@ -456,14 +522,14 @@ export async function fromRecordsToBuffer<T> (
*
* `schema` is required if data is empty
*/
export async function fromRecordsToStreamBuffer<T> (
export async function fromRecordsToStreamBuffer<T>(
data: Array<Record<string, unknown>>,
embeddings?: EmbeddingFunction<T>,
schema?: Schema
schema?: Schema,
): Promise<Buffer> {
const table = await convertToTable(data, embeddings, { schema })
const writer = RecordBatchStreamWriter.writeAll(table)
return Buffer.from(await writer.toUint8Array())
const table = await convertToTable(data, embeddings, { schema });
const writer = RecordBatchStreamWriter.writeAll(table);
return Buffer.from(await writer.toUint8Array());
}
/**
@@ -474,23 +540,23 @@ export async function fromRecordsToStreamBuffer<T> (
*
* `schema` is required if the table is empty
*/
export async function fromTableToBuffer<T> (
export async function fromTableToBuffer<T>(
table: ArrowTable,
embeddings?: EmbeddingFunction<T>,
schema?: Schema
schema?: Schema,
): Promise<Buffer> {
const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema)
const writer = RecordBatchFileWriter.writeAll(tableWithEmbeddings)
return Buffer.from(await writer.toUint8Array())
const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
const writer = RecordBatchFileWriter.writeAll(tableWithEmbeddings);
return Buffer.from(await writer.toUint8Array());
}
export async function fromDataToBuffer<T> (
export async function fromDataToBuffer<T>(
data: Data,
embeddings?: EmbeddingFunction<T>,
schema?: Schema
schema?: Schema,
): Promise<Buffer> {
if (data instanceof ArrowTable) {
return fromTableToBuffer(data, embeddings, schema)
return fromTableToBuffer(data, embeddings, schema);
} else {
const table = await convertToTable(data);
return fromTableToBuffer(table, embeddings, schema);
@@ -505,46 +571,46 @@ export async function fromDataToBuffer<T> (
*
* `schema` is required if the table is empty
*/
export async function fromTableToStreamBuffer<T> (
export async function fromTableToStreamBuffer<T>(
table: ArrowTable,
embeddings?: EmbeddingFunction<T>,
schema?: Schema
schema?: Schema,
): Promise<Buffer> {
const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema)
const writer = RecordBatchStreamWriter.writeAll(tableWithEmbeddings)
return Buffer.from(await writer.toUint8Array())
const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
const writer = RecordBatchStreamWriter.writeAll(tableWithEmbeddings);
return Buffer.from(await writer.toUint8Array());
}
function alignBatch (batch: RecordBatch, schema: Schema): RecordBatch {
const alignedChildren = []
function alignBatch(batch: RecordBatch, schema: Schema): RecordBatch {
const alignedChildren = [];
for (const field of schema.fields) {
const indexInBatch = batch.schema.fields?.findIndex(
(f) => f.name === field.name
)
(f) => f.name === field.name,
);
if (indexInBatch < 0) {
throw new Error(
`The column ${field.name} was not found in the Arrow Table`
)
`The column ${field.name} was not found in the Arrow Table`,
);
}
alignedChildren.push(batch.data.children[indexInBatch])
alignedChildren.push(batch.data.children[indexInBatch]);
}
const newData = makeData({
type: new Struct(schema.fields),
length: batch.numRows,
nullCount: batch.nullCount,
children: alignedChildren
})
return new RecordBatch(schema, newData)
children: alignedChildren,
});
return new RecordBatch(schema, newData);
}
function alignTable (table: ArrowTable, schema: Schema): ArrowTable {
function alignTable(table: ArrowTable, schema: Schema): ArrowTable {
const alignedBatches = table.batches.map((batch) =>
alignBatch(batch, schema)
)
return new ArrowTable(schema, alignedBatches)
alignBatch(batch, schema),
);
return new ArrowTable(schema, alignedBatches);
}
// Creates an empty Arrow Table
export function createEmptyTable (schema: Schema): ArrowTable {
return new ArrowTable(schema)
export function createEmptyTable(schema: Schema): ArrowTable {
return new ArrowTable(schema);
}

View File

@@ -105,7 +105,7 @@ export class Connection {
async createTable(
name: string,
data: Record<string, unknown>[] | ArrowTable,
options?: Partial<CreateTableOptions>
options?: Partial<CreateTableOptions>,
): Promise<Table> {
let mode: string = options?.mode ?? "create";
const existOk = options?.existOk ?? false;
@@ -134,7 +134,7 @@ export class Connection {
async createEmptyTable(
name: string,
schema: Schema,
options?: Partial<CreateTableOptions>
options?: Partial<CreateTableOptions>,
): Promise<Table> {
let mode: string = options?.mode ?? "create";
const existOk = options?.existOk ?? false;

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
import { type Float } from 'apache-arrow'
import { type Float } from "apache-arrow";
/**
* An embedding function that automatically creates vector representation for a given column.
@@ -21,7 +21,7 @@ export interface EmbeddingFunction<T> {
/**
* The name of the column that will be used as input for the Embedding Function.
*/
sourceColumn: string
sourceColumn: string;
/**
* The data type of the embedding
@@ -30,7 +30,7 @@ export interface EmbeddingFunction<T> {
* an Arrow float array. By default this will be Float32 but this property can
* be used to control the conversion.
*/
embeddingDataType?: Float
embeddingDataType?: Float;
/**
* The dimension of the embedding
@@ -39,14 +39,14 @@ export interface EmbeddingFunction<T> {
* `embed`. If this is not specified, and there is an attempt to apply the embedding
* to an empty table, then that process will fail.
*/
embeddingDimension?: number
embeddingDimension?: number;
/**
* The name of the column that will contain the embedding
*
* By default this is "vector"
*/
destColumn?: string
destColumn?: string;
/**
* Should the source column be excluded from the resulting table
@@ -54,15 +54,24 @@ export interface EmbeddingFunction<T> {
* By default the source column is included. Set this to true and
* only the embedding will be stored.
*/
excludeSource?: boolean
excludeSource?: boolean;
/**
* Creates a vector representation for the given values.
*/
embed: (data: T[]) => Promise<number[][]>
embed: (data: T[]) => Promise<number[][]>;
}
export function isEmbeddingFunction<T> (value: any): value is EmbeddingFunction<T> {
return typeof value.sourceColumn === 'string' &&
typeof value.embed === 'function'
export function isEmbeddingFunction<T>(
value: unknown,
): value is EmbeddingFunction<T> {
if (typeof value !== "object" || value === null) {
return false;
}
if (!("sourceColumn" in value) || !("embed" in value)) {
return false;
}
return (
typeof value.sourceColumn === "string" && typeof value.embed === "function"
);
}

View File

@@ -12,46 +12,50 @@
// See the License for the specific language governing permissions and
// limitations under the License.
import { type EmbeddingFunction } from './embedding_function'
import type OpenAI from 'openai'
import { type EmbeddingFunction } from "./embedding_function";
import type OpenAI from "openai";
export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
private readonly _openai: OpenAI
private readonly _modelName: string
private readonly _openai: OpenAI;
private readonly _modelName: string;
constructor (sourceColumn: string, openAIKey: string, modelName: string = 'text-embedding-ada-002') {
constructor(
sourceColumn: string,
openAIKey: string,
modelName: string = "text-embedding-ada-002",
) {
/**
* @type {import("openai").default}
*/
let Openai
let Openai;
try {
// eslint-disable-next-line @typescript-eslint/no-var-requires
Openai = require('openai')
Openai = require("openai");
} catch {
throw new Error('please install openai@^4.24.1 using npm install openai')
throw new Error("please install openai@^4.24.1 using npm install openai");
}
this.sourceColumn = sourceColumn
this.sourceColumn = sourceColumn;
const configuration = {
apiKey: openAIKey
}
apiKey: openAIKey,
};
this._openai = new Openai(configuration)
this._modelName = modelName
this._openai = new Openai(configuration);
this._modelName = modelName;
}
async embed (data: string[]): Promise<number[][]> {
async embed(data: string[]): Promise<number[][]> {
const response = await this._openai.embeddings.create({
model: this._modelName,
input: data
})
input: data,
});
const embeddings: number[][] = []
const embeddings: number[][] = [];
for (let i = 0; i < response.data.length; i++) {
embeddings.push(response.data[i].embedding)
embeddings.push(response.data[i].embedding);
}
return embeddings
return embeddings;
}
sourceColumn: string
sourceColumn: string;
}

View File

@@ -13,7 +13,10 @@
// limitations under the License.
import { Connection } from "./connection";
import { Connection as LanceDbConnection, ConnectionOptions } from "./native.js";
import {
Connection as LanceDbConnection,
ConnectionOptions,
} from "./native.js";
export {
ConnectionOptions,
@@ -38,8 +41,10 @@ export { IvfPQOptions, IndexBuilder } from "./indexer";
*
* @see {@link ConnectionOptions} for more details on the URI format.
*/
export async function connect(uri: string, opts?: Partial<ConnectionOptions>): Promise<Connection>
{
export async function connect(
uri: string,
opts?: Partial<ConnectionOptions>,
): Promise<Connection> {
opts = opts ?? {};
const nativeConn = await LanceDbConnection.new(uri, opts);
return new Connection(nativeConn);

View File

@@ -66,7 +66,7 @@ export class IndexBuilder {
options?.num_sub_vectors,
options?.num_bits,
options?.max_iterations,
options?.sample_rate
options?.sample_rate,
);
return this;
}

View File

@@ -25,14 +25,15 @@ class RecordBatchIterator implements AsyncIterator<RecordBatch> {
constructor(
inner?: NativeBatchIterator,
promise?: Promise<NativeBatchIterator>
promise?: Promise<NativeBatchIterator>,
) {
// TODO: check promise reliably so we dont need to pass two arguments.
this.inner = inner;
this.promised_inner = promise;
}
async next(): Promise<IteratorResult<RecordBatch<any>, any>> {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
async next(): Promise<IteratorResult<RecordBatch<any>>> {
if (this.inner === undefined) {
this.inner = await this.promised_inner;
}
@@ -139,12 +140,13 @@ export class Query implements AsyncIterable<RecordBatch> {
/** Returns a JSON Array of All results.
*
*/
async toArray(): Promise<any[]> {
async toArray(): Promise<unknown[]> {
const tbl = await this.toArrow();
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
return tbl.toArray();
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
[Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>> {
const promise = this.inner.executeStream();
return new RecordBatchIterator(undefined, promise);

View File

@@ -13,7 +13,11 @@
// limitations under the License.
import { Schema, tableFromIPC } from "apache-arrow";
import { AddColumnsSql, ColumnAlteration, Table as _NativeTable } from "./native";
import {
AddColumnsSql,
ColumnAlteration,
Table as _NativeTable,
} from "./native";
import { Query } from "./query";
import { IndexBuilder } from "./indexer";
import { Data, fromDataToBuffer } from "./arrow";
@@ -49,7 +53,6 @@ export class Table {
this.inner = inner;
}
/** Return true if the table has not been closed */
isOpen(): boolean {
return this.inner.isOpen();
@@ -84,7 +87,7 @@ export class Table {
* @return The number of rows added to the table
*/
async add(data: Data, options?: Partial<AddDataOptions>): Promise<void> {
let mode = options?.mode ?? "append";
const mode = options?.mode ?? "append";
const buffer = await fromDataToBuffer(data);
await this.inner.add(buffer, mode);

446
nodejs/package-lock.json generated
View File

@@ -23,13 +23,16 @@
"@types/tmp": "^0.2.6",
"@typescript-eslint/eslint-plugin": "^6.19.0",
"@typescript-eslint/parser": "^6.19.0",
"eslint": "^8.56.0",
"eslint": "^8.57.0",
"eslint-config-prettier": "^9.1.0",
"jest": "^29.7.0",
"prettier": "^3.1.0",
"tmp": "^0.2.3",
"ts-jest": "^29.1.2",
"typedoc": "^0.25.7",
"typedoc-plugin-markdown": "^3.17.1",
"typescript": "^5.3.3"
"typescript": "^5.3.3",
"typescript-eslint": "^7.1.0"
},
"engines": {
"node": ">= 18"
@@ -819,9 +822,9 @@
}
},
"node_modules/@eslint/js": {
"version": "8.56.0",
"resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.56.0.tgz",
"integrity": "sha512-gMsVel9D7f2HLkBma9VbtzZRehRogVRfbr++f06nL2vnCGCNlzOD+/MUov/F4p8myyAHspEhVobgjpX64q5m6A==",
"version": "8.57.0",
"resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.57.0.tgz",
"integrity": "sha512-Ys+3g2TaW7gADOJzPt83SJtCDhMjndcDMFVQ/Tj9iA1BfJzFKD9mAUXT3OenpuPHbI6P/myECxRJrofUsDx/5g==",
"dev": true,
"engines": {
"node": "^12.22.0 || ^14.17.0 || >=16.0.0"
@@ -954,15 +957,6 @@
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
}
},
"node_modules/@jest/console/node_modules/slash": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
"integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
"dev": true,
"engines": {
"node": ">=8"
}
},
"node_modules/@jest/core": {
"version": "29.7.0",
"resolved": "https://registry.npmjs.org/@jest/core/-/core-29.7.0.tgz",
@@ -1025,15 +1019,6 @@
"node": ">=8"
}
},
"node_modules/@jest/core/node_modules/slash": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
"integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
"dev": true,
"engines": {
"node": ">=8"
}
},
"node_modules/@jest/environment": {
"version": "29.7.0",
"resolved": "https://registry.npmjs.org/@jest/environment/-/environment-29.7.0.tgz",
@@ -1149,15 +1134,6 @@
}
}
},
"node_modules/@jest/reporters/node_modules/slash": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
"integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
"dev": true,
"engines": {
"node": ">=8"
}
},
"node_modules/@jest/schemas": {
"version": "29.6.3",
"resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz",
@@ -1223,15 +1199,6 @@
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
}
},
"node_modules/@jest/test-sequencer/node_modules/slash": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
"integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
"dev": true,
"engines": {
"node": ">=8"
}
},
"node_modules/@jest/transform": {
"version": "29.7.0",
"resolved": "https://registry.npmjs.org/@jest/transform/-/transform-29.7.0.tgz",
@@ -1258,15 +1225,6 @@
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
}
},
"node_modules/@jest/transform/node_modules/slash": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
"integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
"dev": true,
"engines": {
"node": ">=8"
}
},
"node_modules/@jest/transform/node_modules/write-file-atomic": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-4.0.2.tgz",
@@ -1739,26 +1697,6 @@
"balanced-match": "^1.0.0"
}
},
"node_modules/@typescript-eslint/typescript-estree/node_modules/globby": {
"version": "11.1.0",
"resolved": "https://registry.npmjs.org/globby/-/globby-11.1.0.tgz",
"integrity": "sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g==",
"dev": true,
"dependencies": {
"array-union": "^2.1.0",
"dir-glob": "^3.0.1",
"fast-glob": "^3.2.9",
"ignore": "^5.2.0",
"merge2": "^1.4.1",
"slash": "^3.0.0"
},
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/@typescript-eslint/typescript-estree/node_modules/minimatch": {
"version": "9.0.3",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.3.tgz",
@@ -1774,15 +1712,6 @@
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/@typescript-eslint/typescript-estree/node_modules/slash": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
"integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
"dev": true,
"engines": {
"node": ">=8"
}
},
"node_modules/@typescript-eslint/utils": {
"version": "6.19.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-6.19.0.tgz",
@@ -2035,15 +1964,6 @@
"@babel/core": "^7.8.0"
}
},
"node_modules/babel-jest/node_modules/slash": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
"integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
"dev": true,
"engines": {
"node": ">=8"
}
},
"node_modules/babel-plugin-istanbul": {
"version": "6.1.1",
"resolved": "https://registry.npmjs.org/babel-plugin-istanbul/-/babel-plugin-istanbul-6.1.1.tgz",
@@ -2642,16 +2562,16 @@
}
},
"node_modules/eslint": {
"version": "8.56.0",
"resolved": "https://registry.npmjs.org/eslint/-/eslint-8.56.0.tgz",
"integrity": "sha512-Go19xM6T9puCOWntie1/P997aXxFsOi37JIHRWI514Hc6ZnaHGKY9xFhrU65RT6CcBEzZoGG1e6Nq+DT04ZtZQ==",
"version": "8.57.0",
"resolved": "https://registry.npmjs.org/eslint/-/eslint-8.57.0.tgz",
"integrity": "sha512-dZ6+mexnaTIbSBZWgou51U6OmzIhYM2VcNdtiTtI7qPNZm35Akpr0f6vtw3w1Kmn5PYo+tZVfh13WrhpS6oLqQ==",
"dev": true,
"dependencies": {
"@eslint-community/eslint-utils": "^4.2.0",
"@eslint-community/regexpp": "^4.6.1",
"@eslint/eslintrc": "^2.1.4",
"@eslint/js": "8.56.0",
"@humanwhocodes/config-array": "^0.11.13",
"@eslint/js": "8.57.0",
"@humanwhocodes/config-array": "^0.11.14",
"@humanwhocodes/module-importer": "^1.0.1",
"@nodelib/fs.walk": "^1.2.8",
"@ungap/structured-clone": "^1.2.0",
@@ -2696,6 +2616,18 @@
"url": "https://opencollective.com/eslint"
}
},
"node_modules/eslint-config-prettier": {
"version": "9.1.0",
"resolved": "https://registry.npmjs.org/eslint-config-prettier/-/eslint-config-prettier-9.1.0.tgz",
"integrity": "sha512-NSWl5BFQWEPi1j4TjVNItzYV7dZXZ+wP6I6ZhrBGpChQhZRUaElihE9uRRkcbRnNb76UMKDF3r+WTmNcGPKsqw==",
"dev": true,
"bin": {
"eslint-config-prettier": "bin/cli.js"
},
"peerDependencies": {
"eslint": ">=7.0.0"
}
},
"node_modules/eslint-scope": {
"version": "7.2.2",
"resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-7.2.2.tgz",
@@ -3159,6 +3091,26 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/globby": {
"version": "11.1.0",
"resolved": "https://registry.npmjs.org/globby/-/globby-11.1.0.tgz",
"integrity": "sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g==",
"dev": true,
"dependencies": {
"array-union": "^2.1.0",
"dir-glob": "^3.0.1",
"fast-glob": "^3.2.9",
"ignore": "^5.2.0",
"merge2": "^1.4.1",
"slash": "^3.0.0"
},
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/graceful-fs": {
"version": "4.2.11",
"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
@@ -3633,15 +3585,6 @@
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
}
},
"node_modules/jest-circus/node_modules/slash": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
"integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
"dev": true,
"engines": {
"node": ">=8"
}
},
"node_modules/jest-cli": {
"version": "29.7.0",
"resolved": "https://registry.npmjs.org/jest-cli/-/jest-cli-29.7.0.tgz",
@@ -3735,15 +3678,6 @@
"node": ">=8"
}
},
"node_modules/jest-config/node_modules/slash": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
"integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
"dev": true,
"engines": {
"node": ">=8"
}
},
"node_modules/jest-diff": {
"version": "29.7.0",
"resolved": "https://registry.npmjs.org/jest-diff/-/jest-diff-29.7.0.tgz",
@@ -3886,15 +3820,6 @@
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
}
},
"node_modules/jest-message-util/node_modules/slash": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
"integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
"dev": true,
"engines": {
"node": ">=8"
}
},
"node_modules/jest-mock": {
"version": "29.7.0",
"resolved": "https://registry.npmjs.org/jest-mock/-/jest-mock-29.7.0.tgz",
@@ -3968,15 +3893,6 @@
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
}
},
"node_modules/jest-resolve/node_modules/slash": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
"integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
"dev": true,
"engines": {
"node": ">=8"
}
},
"node_modules/jest-runner": {
"version": "29.7.0",
"resolved": "https://registry.npmjs.org/jest-runner/-/jest-runner-29.7.0.tgz",
@@ -4054,15 +3970,6 @@
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
}
},
"node_modules/jest-runtime/node_modules/slash": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
"integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
"dev": true,
"engines": {
"node": ">=8"
}
},
"node_modules/jest-snapshot": {
"version": "29.7.0",
"resolved": "https://registry.npmjs.org/jest-snapshot/-/jest-snapshot-29.7.0.tgz",
@@ -4853,6 +4760,21 @@
"node": ">= 0.8.0"
}
},
"node_modules/prettier": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/prettier/-/prettier-3.1.0.tgz",
"integrity": "sha512-TQLvXjq5IAibjh8EpBIkNKxO749UEWABoiIZehEPiY4GNpVdhaFKqSTu+QrlU6D2dPAfubRmtJTi4K4YkQ5eXw==",
"dev": true,
"bin": {
"prettier": "bin/prettier.cjs"
},
"engines": {
"node": ">=14"
},
"funding": {
"url": "https://github.com/prettier/prettier?sponsor=1"
}
},
"node_modules/pretty-format": {
"version": "29.7.0",
"resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz",
@@ -5107,6 +5029,15 @@
"integrity": "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==",
"dev": true
},
"node_modules/slash": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
"integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==",
"dev": true,
"engines": {
"node": ">=8"
}
},
"node_modules/source-map": {
"version": "0.6.1",
"resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
@@ -5498,6 +5429,245 @@
"node": ">=14.17"
}
},
"node_modules/typescript-eslint": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-7.1.0.tgz",
"integrity": "sha512-GfAALH4zoqae5mIfHr7WU3BsULHP73hjwF8vCmyTkH3IXHXjqg3JNWwUcd8CwOTLIr4tjRTZQWpToyESPnpOhg==",
"dev": true,
"dependencies": {
"@typescript-eslint/eslint-plugin": "7.1.0",
"@typescript-eslint/parser": "7.1.0"
},
"engines": {
"node": "^16.0.0 || >=18.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/typescript-eslint"
},
"peerDependencies": {
"eslint": "^8.56.0"
},
"peerDependenciesMeta": {
"typescript": {
"optional": true
}
}
},
"node_modules/typescript-eslint/node_modules/@typescript-eslint/eslint-plugin": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.1.0.tgz",
"integrity": "sha512-j6vT/kCulhG5wBmGtstKeiVr1rdXE4nk+DT1k6trYkwlrvW9eOF5ZbgKnd/YR6PcM4uTEXa0h6Fcvf6X7Dxl0w==",
"dev": true,
"dependencies": {
"@eslint-community/regexpp": "^4.5.1",
"@typescript-eslint/scope-manager": "7.1.0",
"@typescript-eslint/type-utils": "7.1.0",
"@typescript-eslint/utils": "7.1.0",
"@typescript-eslint/visitor-keys": "7.1.0",
"debug": "^4.3.4",
"graphemer": "^1.4.0",
"ignore": "^5.2.4",
"natural-compare": "^1.4.0",
"semver": "^7.5.4",
"ts-api-utils": "^1.0.1"
},
"engines": {
"node": "^16.0.0 || >=18.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/typescript-eslint"
},
"peerDependencies": {
"@typescript-eslint/parser": "^7.0.0",
"eslint": "^8.56.0"
},
"peerDependenciesMeta": {
"typescript": {
"optional": true
}
}
},
"node_modules/typescript-eslint/node_modules/@typescript-eslint/parser": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-7.1.0.tgz",
"integrity": "sha512-V1EknKUubZ1gWFjiOZhDSNToOjs63/9O0puCgGS8aDOgpZY326fzFu15QAUjwaXzRZjf/qdsdBrckYdv9YxB8w==",
"dev": true,
"dependencies": {
"@typescript-eslint/scope-manager": "7.1.0",
"@typescript-eslint/types": "7.1.0",
"@typescript-eslint/typescript-estree": "7.1.0",
"@typescript-eslint/visitor-keys": "7.1.0",
"debug": "^4.3.4"
},
"engines": {
"node": "^16.0.0 || >=18.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/typescript-eslint"
},
"peerDependencies": {
"eslint": "^8.56.0"
},
"peerDependenciesMeta": {
"typescript": {
"optional": true
}
}
},
"node_modules/typescript-eslint/node_modules/@typescript-eslint/scope-manager": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-7.1.0.tgz",
"integrity": "sha512-6TmN4OJiohHfoOdGZ3huuLhpiUgOGTpgXNUPJgeZOZR3DnIpdSgtt83RS35OYNNXxM4TScVlpVKC9jyQSETR1A==",
"dev": true,
"dependencies": {
"@typescript-eslint/types": "7.1.0",
"@typescript-eslint/visitor-keys": "7.1.0"
},
"engines": {
"node": "^16.0.0 || >=18.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/typescript-eslint"
}
},
"node_modules/typescript-eslint/node_modules/@typescript-eslint/type-utils": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-7.1.0.tgz",
"integrity": "sha512-UZIhv8G+5b5skkcuhgvxYWHjk7FW7/JP5lPASMEUoliAPwIH/rxoUSQPia2cuOj9AmDZmwUl1usKm85t5VUMew==",
"dev": true,
"dependencies": {
"@typescript-eslint/typescript-estree": "7.1.0",
"@typescript-eslint/utils": "7.1.0",
"debug": "^4.3.4",
"ts-api-utils": "^1.0.1"
},
"engines": {
"node": "^16.0.0 || >=18.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/typescript-eslint"
},
"peerDependencies": {
"eslint": "^8.56.0"
},
"peerDependenciesMeta": {
"typescript": {
"optional": true
}
}
},
"node_modules/typescript-eslint/node_modules/@typescript-eslint/types": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-7.1.0.tgz",
"integrity": "sha512-qTWjWieJ1tRJkxgZYXx6WUYtWlBc48YRxgY2JN1aGeVpkhmnopq+SUC8UEVGNXIvWH7XyuTjwALfG6bFEgCkQA==",
"dev": true,
"engines": {
"node": "^16.0.0 || >=18.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/typescript-eslint"
}
},
"node_modules/typescript-eslint/node_modules/@typescript-eslint/typescript-estree": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-7.1.0.tgz",
"integrity": "sha512-k7MyrbD6E463CBbSpcOnwa8oXRdHzH1WiVzOipK3L5KSML92ZKgUBrTlehdi7PEIMT8k0bQixHUGXggPAlKnOQ==",
"dev": true,
"dependencies": {
"@typescript-eslint/types": "7.1.0",
"@typescript-eslint/visitor-keys": "7.1.0",
"debug": "^4.3.4",
"globby": "^11.1.0",
"is-glob": "^4.0.3",
"minimatch": "9.0.3",
"semver": "^7.5.4",
"ts-api-utils": "^1.0.1"
},
"engines": {
"node": "^16.0.0 || >=18.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/typescript-eslint"
},
"peerDependenciesMeta": {
"typescript": {
"optional": true
}
}
},
"node_modules/typescript-eslint/node_modules/@typescript-eslint/utils": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-7.1.0.tgz",
"integrity": "sha512-WUFba6PZC5OCGEmbweGpnNJytJiLG7ZvDBJJoUcX4qZYf1mGZ97mO2Mps6O2efxJcJdRNpqweCistDbZMwIVHw==",
"dev": true,
"dependencies": {
"@eslint-community/eslint-utils": "^4.4.0",
"@types/json-schema": "^7.0.12",
"@types/semver": "^7.5.0",
"@typescript-eslint/scope-manager": "7.1.0",
"@typescript-eslint/types": "7.1.0",
"@typescript-eslint/typescript-estree": "7.1.0",
"semver": "^7.5.4"
},
"engines": {
"node": "^16.0.0 || >=18.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/typescript-eslint"
},
"peerDependencies": {
"eslint": "^8.56.0"
}
},
"node_modules/typescript-eslint/node_modules/@typescript-eslint/visitor-keys": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-7.1.0.tgz",
"integrity": "sha512-FhUqNWluiGNzlvnDZiXad4mZRhtghdoKW6e98GoEOYSu5cND+E39rG5KwJMUzeENwm1ztYBRqof8wMLP+wNPIA==",
"dev": true,
"dependencies": {
"@typescript-eslint/types": "7.1.0",
"eslint-visitor-keys": "^3.4.1"
},
"engines": {
"node": "^16.0.0 || >=18.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/typescript-eslint"
}
},
"node_modules/typescript-eslint/node_modules/brace-expansion": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
"dev": true,
"dependencies": {
"balanced-match": "^1.0.0"
}
},
"node_modules/typescript-eslint/node_modules/minimatch": {
"version": "9.0.3",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.3.tgz",
"integrity": "sha512-RHiac9mvaRw0x3AYRgDC1CxAP7HTcNrrECeA8YYJeWnpo+2Q5CegtZjaotWTWxDG3UeGA1coE05iH1mPjT/2mg==",
"dev": true,
"dependencies": {
"brace-expansion": "^2.0.1"
},
"engines": {
"node": ">=16 || 14 >=14.17"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/typical": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/typical/-/typical-4.0.0.tgz",

View File

@@ -22,13 +22,16 @@
"@types/tmp": "^0.2.6",
"@typescript-eslint/eslint-plugin": "^6.19.0",
"@typescript-eslint/parser": "^6.19.0",
"eslint": "^8.56.0",
"eslint": "^8.57.0",
"eslint-config-prettier": "^9.1.0",
"jest": "^29.7.0",
"prettier": "^3.1.0",
"tmp": "^0.2.3",
"ts-jest": "^29.1.2",
"typedoc": "^0.25.7",
"typedoc-plugin-markdown": "^3.17.1",
"typescript": "^5.3.3"
"typescript": "^5.3.3",
"typescript-eslint": "^7.1.0"
},
"ava": {
"timeout": "3m"
@@ -50,8 +53,9 @@
"build:native": "napi build --platform --release --js lancedb/native.js --dts lancedb/native.d.ts dist/",
"build:debug": "napi build --platform --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
"build": "npm run build:debug && tsc -b",
"chkformat": "prettier . --check",
"docs": "typedoc --plugin typedoc-plugin-markdown lancedb/index.ts",
"lint": "eslint lancedb --ext .js,.ts",
"lint": "eslint lancedb",
"prepublishOnly": "napi prepublish -t npm",
"test": "npm run build && jest --verbose",
"universal": "napi universal",

View File

@@ -1,9 +1,5 @@
{
"include": [
"lancedb/*.ts",
"lancedb/**/*.ts",
"lancedb/*.js",
],
"include": ["lancedb/*.ts", "lancedb/**/*.ts", "lancedb/*.js"],
"compilerOptions": {
"target": "es2022",
"module": "commonjs",
@@ -11,21 +7,17 @@
"outDir": "./dist",
"strict": true,
"allowJs": true,
"resolveJsonModule": true,
"resolveJsonModule": true
},
"exclude": [
"./dist/*",
],
"exclude": ["./dist/*"],
"typedocOptions": {
"entryPoints": [
"lancedb/index.ts"
],
"entryPoints": ["lancedb/index.ts"],
"out": "../docs/src/javascript/",
"visibilityFilters": {
"protected": false,
"private": false,
"inherited": true,
"external": false,
"external": false
}
}
}