ci(node): run examples in CI (#1796)

This is done as setup for a PR that will fix the OpenAI dependency
issue.

 * [x] FTS examples
 * [x] Setup mock openai
 * [x] Ran `npm audit fix`
 * [x] sentences embeddings test
 * [x] Double check formatting of docs examples
This commit is contained in:
Will Jones
2024-11-13 11:10:56 -08:00
committed by GitHub
parent 9f228feb0e
commit 0fd8a50bd7
39 changed files with 6141 additions and 1705 deletions

View File

@@ -53,6 +53,9 @@ jobs:
cargo clippy --all --all-features -- -D warnings
npm ci
npm run lint-ci
- name: Lint examples
working-directory: nodejs/examples
run: npm ci && npm run lint-ci
linux:
name: Linux (NodeJS ${{ matrix.node-version }})
timeout-minutes: 30
@@ -91,6 +94,19 @@ jobs:
env:
S3_TEST: "1"
run: npm run test
- name: Setup examples
working-directory: nodejs/examples
run: npm ci
- name: Test examples
working-directory: ./
env:
OPENAI_API_KEY: test
OPENAI_BASE_URL: http://0.0.0.0:8000
run: |
python ci/mock_openai.py &
ss -ltnp | grep :8000
cd nodejs/examples
npm test
macos:
timeout-minutes: 30
runs-on: "macos-14"

57
ci/mock_openai.py Normal file
View File

@@ -0,0 +1,57 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
"""A zero-dependency mock OpenAI embeddings API endpoint for testing purposes."""
import argparse
import json
import http.server
class MockOpenAIRequestHandler(http.server.BaseHTTPRequestHandler):
def do_POST(self):
content_length = int(self.headers["Content-Length"])
post_data = self.rfile.read(content_length)
post_data = json.loads(post_data.decode("utf-8"))
# See: https://platform.openai.com/docs/api-reference/embeddings/create
if isinstance(post_data["input"], str):
num_inputs = 1
else:
num_inputs = len(post_data["input"])
model = post_data.get("model", "text-embedding-ada-002")
data = []
for i in range(num_inputs):
data.append({
"object": "embedding",
"embedding": [0.1] * 1536,
"index": i,
})
response = {
"object": "list",
"data": data,
"model": model,
"usage": {
"prompt_tokens": 0,
"total_tokens": 0,
}
}
self.send_response(200)
self.send_header("Content-type", "application/json")
self.end_headers()
self.wfile.write(json.dumps(response).encode("utf-8"))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Mock OpenAI embeddings API endpoint")
parser.add_argument("--port", type=int, default=8000, help="Port to listen on")
args = parser.parse_args()
port = args.port
print(f"server started on port {port}. Press Ctrl-C to stop.")
print(f"To use, set OPENAI_BASE_URL=http://localhost:{port} in your environment.")
with http.server.HTTPServer(("0.0.0.0", port), MockOpenAIRequestHandler) as server:
server.serve_forever()

View File

@@ -45,9 +45,9 @@ Lance supports `IVF_PQ` index type by default.
Creating indexes is done via the [lancedb.Table.createIndex](../js/classes/Table.md/#createIndex) method.
```typescript
--8<--- "nodejs/examples/ann_indexes.ts:import"
--8<--- "nodejs/examples/ann_indexes.test.ts:import"
--8<-- "nodejs/examples/ann_indexes.ts:ingest"
--8<-- "nodejs/examples/ann_indexes.test.ts:ingest"
```
=== "vectordb (deprecated)"
@@ -171,7 +171,7 @@ There are a couple of parameters that can be used to fine-tune the search:
=== "@lancedb/lancedb"
```typescript
--8<-- "nodejs/examples/ann_indexes.ts:search1"
--8<-- "nodejs/examples/ann_indexes.test.ts:search1"
```
=== "vectordb (deprecated)"
@@ -205,7 +205,7 @@ You can further filter the elements returned by a search using a where clause.
=== "@lancedb/lancedb"
```typescript
--8<-- "nodejs/examples/ann_indexes.ts:search2"
--8<-- "nodejs/examples/ann_indexes.test.ts:search2"
```
=== "vectordb (deprecated)"
@@ -237,7 +237,7 @@ You can select the columns returned by the query using a select clause.
=== "@lancedb/lancedb"
```typescript
--8<-- "nodejs/examples/ann_indexes.ts:search3"
--8<-- "nodejs/examples/ann_indexes.test.ts:search3"
```
=== "vectordb (deprecated)"

View File

@@ -157,7 +157,7 @@ recommend switching to stable releases.
import * as lancedb from "@lancedb/lancedb";
import * as arrow from "apache-arrow";
--8<-- "nodejs/examples/basic.ts:connect"
--8<-- "nodejs/examples/basic.test.ts:connect"
```
=== "vectordb (deprecated)"
@@ -212,7 +212,7 @@ table.
=== "@lancedb/lancedb"
```typescript
--8<-- "nodejs/examples/basic.ts:create_table"
--8<-- "nodejs/examples/basic.test.ts:create_table"
```
=== "vectordb (deprecated)"
@@ -268,7 +268,7 @@ similar to a `CREATE TABLE` statement in SQL.
=== "@lancedb/lancedb"
```typescript
--8<-- "nodejs/examples/basic.ts:create_empty_table"
--8<-- "nodejs/examples/basic.test.ts:create_empty_table"
```
=== "vectordb (deprecated)"
@@ -298,7 +298,7 @@ Once created, you can open a table as follows:
=== "@lancedb/lancedb"
```typescript
--8<-- "nodejs/examples/basic.ts:open_table"
--8<-- "nodejs/examples/basic.test.ts:open_table"
```
=== "vectordb (deprecated)"
@@ -327,7 +327,7 @@ If you forget the name of your table, you can always get a listing of all table
=== "@lancedb/lancedb"
```typescript
--8<-- "nodejs/examples/basic.ts:table_names"
--8<-- "nodejs/examples/basic.test.ts:table_names"
```
=== "vectordb (deprecated)"
@@ -357,7 +357,7 @@ After a table has been created, you can always add more data to it as follows:
=== "@lancedb/lancedb"
```typescript
--8<-- "nodejs/examples/basic.ts:add_data"
--8<-- "nodejs/examples/basic.test.ts:add_data"
```
=== "vectordb (deprecated)"
@@ -389,7 +389,7 @@ Once you've embedded the query, you can find its nearest neighbors as follows:
=== "@lancedb/lancedb"
```typescript
--8<-- "nodejs/examples/basic.ts:vector_search"
--8<-- "nodejs/examples/basic.test.ts:vector_search"
```
=== "vectordb (deprecated)"
@@ -429,7 +429,7 @@ LanceDB allows you to create an ANN index on a table as follows:
=== "@lancedb/lancedb"
```typescript
--8<-- "nodejs/examples/basic.ts:create_index"
--8<-- "nodejs/examples/basic.test.ts:create_index"
```
=== "vectordb (deprecated)"
@@ -469,7 +469,7 @@ This can delete any number of rows that match the filter.
=== "@lancedb/lancedb"
```typescript
--8<-- "nodejs/examples/basic.ts:delete_rows"
--8<-- "nodejs/examples/basic.test.ts:delete_rows"
```
=== "vectordb (deprecated)"
@@ -527,7 +527,7 @@ Use the `drop_table()` method on the database to remove a table.
=== "@lancedb/lancedb"
```typescript
--8<-- "nodejs/examples/basic.ts:drop_table"
--8<-- "nodejs/examples/basic.test.ts:drop_table"
```
=== "vectordb (deprecated)"
@@ -561,8 +561,8 @@ You can use the embedding API when working with embedding models. It automatical
=== "@lancedb/lancedb"
```typescript
--8<-- "nodejs/examples/embedding.ts:imports"
--8<-- "nodejs/examples/embedding.ts:openai_embeddings"
--8<-- "nodejs/examples/embedding.test.ts:imports"
--8<-- "nodejs/examples/embedding.test.ts:openai_embeddings"
```
=== "Rust"

View File

@@ -47,9 +47,9 @@ Let's implement `SentenceTransformerEmbeddings` class. All you need to do is imp
=== "TypeScript"
```ts
--8<--- "nodejs/examples/custom_embedding_function.ts:imports"
--8<--- "nodejs/examples/custom_embedding_function.test.ts:imports"
--8<--- "nodejs/examples/custom_embedding_function.ts:embedding_impl"
--8<--- "nodejs/examples/custom_embedding_function.test.ts:embedding_impl"
```
@@ -78,7 +78,7 @@ Now you can use this embedding function to create your table schema and that's i
=== "TypeScript"
```ts
--8<--- "nodejs/examples/custom_embedding_function.ts:call_custom_function"
--8<--- "nodejs/examples/custom_embedding_function.test.ts:call_custom_function"
```
!!! note

View File

@@ -94,8 +94,8 @@ the embeddings at all:
=== "@lancedb/lancedb"
```ts
--8<-- "nodejs/examples/embedding.ts:imports"
--8<-- "nodejs/examples/embedding.ts:embedding_function"
--8<-- "nodejs/examples/embedding.test.ts:imports"
--8<-- "nodejs/examples/embedding.test.ts:embedding_function"
```
=== "vectordb (deprecated)"
@@ -150,7 +150,7 @@ need to worry about it when you query the table:
.toArray()
```
=== "vectordb (deprecated)
=== "vectordb (deprecated)"
```ts
const results = await table

View File

@@ -51,8 +51,8 @@ LanceDB registers the OpenAI embeddings function in the registry as `openai`. Yo
=== "TypeScript"
```typescript
--8<--- "nodejs/examples/embedding.ts:imports"
--8<--- "nodejs/examples/embedding.ts:openai_embeddings"
--8<--- "nodejs/examples/embedding.test.ts:imports"
--8<--- "nodejs/examples/embedding.test.ts:openai_embeddings"
```
=== "Rust"
@@ -121,12 +121,10 @@ class Words(LanceModel):
vector: Vector(func.ndims()) = func.VectorField()
table = db.create_table("words", schema=Words)
table.add(
[
{"text": "hello world"},
{"text": "goodbye world"}
]
)
table.add([
{"text": "hello world"},
{"text": "goodbye world"}
])
query = "greetings"
actual = table.search(query).limit(1).to_pydantic(Words)[0]

View File

@@ -85,13 +85,13 @@ Initialize a LanceDB connection and create a table
```ts
--8<-- "nodejs/examples/basic.ts:create_table"
--8<-- "nodejs/examples/basic.test.ts:create_table"
```
This will infer the schema from the provided data. If you want to explicitly provide a schema, you can use `apache-arrow` to declare a schema
```ts
--8<-- "nodejs/examples/basic.ts:create_table_with_schema"
--8<-- "nodejs/examples/basic.test.ts:create_table_with_schema"
```
!!! info "Note"
@@ -100,14 +100,14 @@ Initialize a LanceDB connection and create a table
passed in will NOT be appended to the table in that case.
```ts
--8<-- "nodejs/examples/basic.ts:create_table_exists_ok"
--8<-- "nodejs/examples/basic.test.ts:create_table_exists_ok"
```
Sometimes you want to make sure that you start fresh. If you want to
overwrite the table, you can pass in mode: "overwrite" to the createTable function.
```ts
--8<-- "nodejs/examples/basic.ts:create_table_overwrite"
--8<-- "nodejs/examples/basic.test.ts:create_table_overwrite"
```
=== "vectordb (deprecated)"
@@ -227,7 +227,7 @@ LanceDB supports float16 data type!
=== "@lancedb/lancedb"
```typescript
--8<-- "nodejs/examples/basic.ts:create_f16_table"
--8<-- "nodejs/examples/basic.test.ts:create_f16_table"
```
=== "vectordb (deprecated)"
@@ -455,7 +455,7 @@ You can create an empty table for scenarios where you want to add data to the ta
=== "@lancedb/lancedb"
```typescript
--8<-- "nodejs/examples/basic.ts:create_empty_table"
--8<-- "nodejs/examples/basic.test.ts:create_empty_table"
```
=== "vectordb (deprecated)"

View File

@@ -58,9 +58,9 @@ db.create_table("my_vectors", data=data)
=== "@lancedb/lancedb"
```ts
--8<-- "nodejs/examples/search.ts:import"
--8<-- "nodejs/examples/search.test.ts:import"
--8<-- "nodejs/examples/search.ts:search1"
--8<-- "nodejs/examples/search.test.ts:search1"
```
@@ -89,7 +89,7 @@ By default, `l2` will be used as metric type. You can specify the metric type as
=== "@lancedb/lancedb"
```ts
--8<-- "nodejs/examples/search.ts:search2"
--8<-- "nodejs/examples/search.test.ts:search2"
```
=== "vectordb (deprecated)"

View File

@@ -49,7 +49,7 @@ const tbl = await db.createTable('myVectors', data)
=== "@lancedb/lancedb"
```ts
--8<-- "nodejs/examples/filtering.ts:search"
--8<-- "nodejs/examples/filtering.test.ts:search"
```
=== "vectordb (deprecated)"
@@ -91,7 +91,7 @@ For example, the following filter string is acceptable:
=== "@lancedb/lancedb"
```ts
--8<-- "nodejs/examples/filtering.ts:vec_search"
--8<-- "nodejs/examples/filtering.test.ts:vec_search"
```
=== "vectordb (deprecated)"
@@ -169,7 +169,7 @@ You can also filter your data without search.
=== "@lancedb/lancedb"
```ts
--8<-- "nodejs/examples/filtering.ts:sql_search"
--8<-- "nodejs/examples/filtering.test.ts:sql_search"
```
=== "vectordb (deprecated)"

View File

@@ -9,7 +9,8 @@
"**/native.js",
"**/native.d.ts",
"**/npm/**/*",
"**/.vscode/**"
"**/.vscode/**",
"./examples/*"
]
},
"formatter": {

View File

@@ -0,0 +1,57 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import { expect, test } from "@jest/globals";
// --8<-- [start:import]
import * as lancedb from "@lancedb/lancedb";
import { VectorQuery } from "@lancedb/lancedb";
// --8<-- [end:import]
import { withTempDirectory } from "./util.ts";
test("ann index examples", async () => {
await withTempDirectory(async (databaseDir) => {
// --8<-- [start:ingest]
const db = await lancedb.connect(databaseDir);
const data = Array.from({ length: 5_000 }, (_, i) => ({
vector: Array(128).fill(i),
id: `${i}`,
content: "",
longId: `${i}`,
}));
const table = await db.createTable("my_vectors", data, {
mode: "overwrite",
});
await table.createIndex("vector", {
config: lancedb.Index.ivfPq({
numPartitions: 10,
numSubVectors: 16,
}),
});
// --8<-- [end:ingest]
// --8<-- [start:search1]
const search = table.search(Array(128).fill(1.2)).limit(2) as VectorQuery;
const results1 = await search.nprobes(20).refineFactor(10).toArray();
// --8<-- [end:search1]
expect(results1.length).toBe(2);
// --8<-- [start:search2]
const results2 = await table
.search(Array(128).fill(1.2))
.where("id != '1141'")
.limit(2)
.toArray();
// --8<-- [end:search2]
expect(results2.length).toBe(2);
// --8<-- [start:search3]
const results3 = await table
.search(Array(128).fill(1.2))
.select(["id"])
.limit(2)
.toArray();
// --8<-- [end:search3]
expect(results3.length).toBe(2);
});
}, 100_000);

View File

@@ -1,49 +0,0 @@
// --8<-- [start:import]
import * as lancedb from "@lancedb/lancedb";
// --8<-- [end:import]
// --8<-- [start:ingest]
const db = await lancedb.connect("/tmp/lancedb/");
const data = Array.from({ length: 10_000 }, (_, i) => ({
vector: Array(1536).fill(i),
id: `${i}`,
content: "",
longId: `${i}`,
}));
const table = await db.createTable("my_vectors", data, { mode: "overwrite" });
await table.createIndex("vector", {
config: lancedb.Index.ivfPq({
numPartitions: 16,
numSubVectors: 48,
}),
});
// --8<-- [end:ingest]
// --8<-- [start:search1]
const _results1 = await table
.search(Array(1536).fill(1.2))
.limit(2)
.nprobes(20)
.refineFactor(10)
.toArray();
// --8<-- [end:search1]
// --8<-- [start:search2]
const _results2 = await table
.search(Array(1536).fill(1.2))
.where("id != '1141'")
.limit(2)
.toArray();
// --8<-- [end:search2]
// --8<-- [start:search3]
const _results3 = await table
.search(Array(1536).fill(1.2))
.select(["id"])
.limit(2)
.toArray();
// --8<-- [end:search3]
console.log("Ann indexes: done");

View File

@@ -0,0 +1,175 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import { expect, test } from "@jest/globals";
// --8<-- [start:imports]
import * as lancedb from "@lancedb/lancedb";
import * as arrow from "apache-arrow";
import {
Field,
FixedSizeList,
Float16,
Int32,
Schema,
Utf8,
} from "apache-arrow";
// --8<-- [end:imports]
import { withTempDirectory } from "./util.ts";
test("basic table examples", async () => {
await withTempDirectory(async (databaseDir) => {
// --8<-- [start:connect]
const db = await lancedb.connect(databaseDir);
// --8<-- [end:connect]
{
// --8<-- [start:create_table]
const _tbl = await db.createTable(
"myTable",
[
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
],
{ mode: "overwrite" },
);
// --8<-- [end:create_table]
const data = [
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
];
{
// --8<-- [start:create_table_exists_ok]
const tbl = await db.createTable("myTable", data, {
existOk: true,
});
// --8<-- [end:create_table_exists_ok]
expect(await tbl.countRows()).toBe(2);
}
{
// --8<-- [start:create_table_overwrite]
const tbl = await db.createTable("myTable", data, {
mode: "overwrite",
});
// --8<-- [end:create_table_overwrite]
expect(await tbl.countRows()).toBe(2);
}
}
await db.dropTable("myTable");
{
// --8<-- [start:create_table_with_schema]
const schema = new arrow.Schema([
new arrow.Field(
"vector",
new arrow.FixedSizeList(
2,
new arrow.Field("item", new arrow.Float32(), true),
),
),
new arrow.Field("item", new arrow.Utf8(), true),
new arrow.Field("price", new arrow.Float32(), true),
]);
const data = [
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
];
const tbl = await db.createTable("myTable", data, {
schema,
});
// --8<-- [end:create_table_with_schema]
expect(await tbl.countRows()).toBe(2);
}
{
// --8<-- [start:create_empty_table]
const schema = new arrow.Schema([
new arrow.Field("id", new arrow.Int32()),
new arrow.Field("name", new arrow.Utf8()),
]);
const emptyTbl = await db.createEmptyTable("empty_table", schema);
// --8<-- [end:create_empty_table]
expect(await emptyTbl.countRows()).toBe(0);
}
{
// --8<-- [start:open_table]
const _tbl = await db.openTable("myTable");
// --8<-- [end:open_table]
}
{
// --8<-- [start:table_names]
const tableNames = await db.tableNames();
// --8<-- [end:table_names]
expect(tableNames).toEqual(["empty_table", "myTable"]);
}
const tbl = await db.openTable("myTable");
{
// --8<-- [start:add_data]
const data = [
{ vector: [1.3, 1.4], item: "fizz", price: 100.0 },
{ vector: [9.5, 56.2], item: "buzz", price: 200.0 },
];
await tbl.add(data);
// --8<-- [end:add_data]
}
{
// --8<-- [start:vector_search]
const res = await tbl.search([100, 100]).limit(2).toArray();
// --8<-- [end:vector_search]
expect(res.length).toBe(2);
}
{
const data = Array.from({ length: 1000 })
.fill(null)
.map(() => ({
vector: [Math.random(), Math.random()],
item: "autogen",
price: Math.round(Math.random() * 100),
}));
await tbl.add(data);
}
// --8<-- [start:create_index]
await tbl.createIndex("vector");
// --8<-- [end:create_index]
// --8<-- [start:delete_rows]
await tbl.delete('item = "fizz"');
// --8<-- [end:delete_rows]
// --8<-- [start:drop_table]
await db.dropTable("myTable");
// --8<-- [end:drop_table]
await db.dropTable("empty_table");
{
// --8<-- [start:create_f16_table]
const db = await lancedb.connect(databaseDir);
const dim = 16;
const total = 10;
const f16Schema = new Schema([
new Field("id", new Int32()),
new Field(
"vector",
new FixedSizeList(dim, new Field("item", new Float16(), true)),
false,
),
]);
const data = lancedb.makeArrowTable(
Array.from(Array(total), (_, i) => ({
id: i,
vector: Array.from(Array(dim), Math.random),
})),
{ schema: f16Schema },
);
const _table = await db.createTable("f16_tbl", data);
// --8<-- [end:create_f16_table]
await db.dropTable("f16_tbl");
}
});
});

View File

@@ -1,162 +0,0 @@
// --8<-- [start:imports]
import * as lancedb from "@lancedb/lancedb";
import * as arrow from "apache-arrow";
import {
Field,
FixedSizeList,
Float16,
Int32,
Schema,
Utf8,
} from "apache-arrow";
// --8<-- [end:imports]
// --8<-- [start:connect]
const uri = "/tmp/lancedb/";
const db = await lancedb.connect(uri);
// --8<-- [end:connect]
{
// --8<-- [start:create_table]
const tbl = await db.createTable(
"myTable",
[
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
],
{ mode: "overwrite" },
);
// --8<-- [end:create_table]
const data = [
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
];
{
// --8<-- [start:create_table_exists_ok]
const tbl = await db.createTable("myTable", data, {
existsOk: true,
});
// --8<-- [end:create_table_exists_ok]
}
{
// --8<-- [start:create_table_overwrite]
const _tbl = await db.createTable("myTable", data, {
mode: "overwrite",
});
// --8<-- [end:create_table_overwrite]
}
}
{
// --8<-- [start:create_table_with_schema]
const schema = new arrow.Schema([
new arrow.Field(
"vector",
new arrow.FixedSizeList(
2,
new arrow.Field("item", new arrow.Float32(), true),
),
),
new arrow.Field("item", new arrow.Utf8(), true),
new arrow.Field("price", new arrow.Float32(), true),
]);
const data = [
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
];
const _tbl = await db.createTable("myTable", data, {
schema,
});
// --8<-- [end:create_table_with_schema]
}
{
// --8<-- [start:create_empty_table]
const schema = new arrow.Schema([
new arrow.Field("id", new arrow.Int32()),
new arrow.Field("name", new arrow.Utf8()),
]);
const empty_tbl = await db.createEmptyTable("empty_table", schema);
// --8<-- [end:create_empty_table]
}
{
// --8<-- [start:open_table]
const _tbl = await db.openTable("myTable");
// --8<-- [end:open_table]
}
{
// --8<-- [start:table_names]
const tableNames = await db.tableNames();
console.log(tableNames);
// --8<-- [end:table_names]
}
const tbl = await db.openTable("myTable");
{
// --8<-- [start:add_data]
const data = [
{ vector: [1.3, 1.4], item: "fizz", price: 100.0 },
{ vector: [9.5, 56.2], item: "buzz", price: 200.0 },
];
await tbl.add(data);
// --8<-- [end:add_data]
}
{
// --8<-- [start:vector_search]
const _res = tbl.search([100, 100]).limit(2).toArray();
// --8<-- [end:vector_search]
}
{
const data = Array.from({ length: 1000 })
.fill(null)
.map(() => ({
vector: [Math.random(), Math.random()],
item: "autogen",
price: Math.round(Math.random() * 100),
}));
await tbl.add(data);
}
// --8<-- [start:create_index]
await tbl.createIndex("vector");
// --8<-- [end:create_index]
// --8<-- [start:delete_rows]
await tbl.delete('item = "fizz"');
// --8<-- [end:delete_rows]
// --8<-- [start:drop_table]
await db.dropTable("myTable");
// --8<-- [end:drop_table]
await db.dropTable("empty_table");
{
// --8<-- [start:create_f16_table]
const db = await lancedb.connect("/tmp/lancedb");
const dim = 16;
const total = 10;
const f16Schema = new Schema([
new Field("id", new Int32()),
new Field(
"vector",
new FixedSizeList(dim, new Field("item", new Float16(), true)),
false,
),
]);
const data = lancedb.makeArrowTable(
Array.from(Array(total), (_, i) => ({
id: i,
vector: Array.from(Array(dim), Math.random),
})),
{ schema: f16Schema },
);
const _table = await db.createTable("f16_tbl", data);
// --8<-- [end:create_f16_table]
await db.dropTable("f16_tbl");
}

View File

@@ -0,0 +1,76 @@
import { FeatureExtractionPipeline, pipeline } from "@huggingface/transformers";
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import { expect, test } from "@jest/globals";
// --8<-- [start:imports]
import * as lancedb from "@lancedb/lancedb";
import {
LanceSchema,
TextEmbeddingFunction,
getRegistry,
register,
} from "@lancedb/lancedb/embedding";
// --8<-- [end:imports]
import { withTempDirectory } from "./util.ts";
// --8<-- [start:embedding_impl]
@register("sentence-transformers")
class SentenceTransformersEmbeddings extends TextEmbeddingFunction {
name = "Xenova/all-miniLM-L6-v2";
#ndims!: number;
extractor!: FeatureExtractionPipeline;
async init() {
this.extractor = await pipeline("feature-extraction", this.name, {
dtype: "fp32",
});
this.#ndims = await this.generateEmbeddings(["hello"]).then(
(e) => e[0].length,
);
}
ndims() {
return this.#ndims;
}
toJSON() {
return {
name: this.name,
};
}
async generateEmbeddings(texts: string[]) {
const output = await this.extractor(texts, {
pooling: "mean",
normalize: true,
});
return output.tolist();
}
}
// -8<-- [end:embedding_impl]
test("Registry examples", async () => {
await withTempDirectory(async (databaseDir) => {
// --8<-- [start:call_custom_function]
const registry = getRegistry();
const sentenceTransformer = await registry
.get<SentenceTransformersEmbeddings>("sentence-transformers")!
.create();
const schema = LanceSchema({
vector: sentenceTransformer.vectorField(),
text: sentenceTransformer.sourceField(),
});
const db = await lancedb.connect(databaseDir);
const table = await db.createEmptyTable("table", schema, {
mode: "overwrite",
});
await table.add([{ text: "hello" }, { text: "world" }]);
const results = await table.search("greeting").limit(1).toArray();
// -8<-- [end:call_custom_function]
expect(results.length).toBe(1);
});
}, 100_000);

View File

@@ -1,64 +0,0 @@
// --8<-- [start:imports]
import * as lancedb from "@lancedb/lancedb";
import {
LanceSchema,
TextEmbeddingFunction,
getRegistry,
register,
} from "@lancedb/lancedb/embedding";
import { pipeline } from "@xenova/transformers";
// --8<-- [end:imports]
// --8<-- [start:embedding_impl]
@register("sentence-transformers")
class SentenceTransformersEmbeddings extends TextEmbeddingFunction {
name = "Xenova/all-miniLM-L6-v2";
#ndims!: number;
extractor: any;
async init() {
this.extractor = await pipeline("feature-extraction", this.name);
this.#ndims = await this.generateEmbeddings(["hello"]).then(
(e) => e[0].length,
);
}
ndims() {
return this.#ndims;
}
toJSON() {
return {
name: this.name,
};
}
async generateEmbeddings(texts: string[]) {
const output = await this.extractor(texts, {
pooling: "mean",
normalize: true,
});
return output.tolist();
}
}
// -8<-- [end:embedding_impl]
// --8<-- [start:call_custom_function]
const registry = getRegistry();
const sentenceTransformer = await registry
.get<SentenceTransformersEmbeddings>("sentence-transformers")!
.create();
const schema = LanceSchema({
vector: sentenceTransformer.vectorField(),
text: sentenceTransformer.sourceField(),
});
const db = await lancedb.connect("/tmp/db");
const table = await db.createEmptyTable("table", schema, { mode: "overwrite" });
await table.add([{ text: "hello" }, { text: "world" }]);
const results = await table.search("greeting").limit(1).toArray();
console.log(results[0].text);
// -8<-- [end:call_custom_function]

View File

@@ -0,0 +1,96 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import { expect, test } from "@jest/globals";
// --8<-- [start:imports]
import * as lancedb from "@lancedb/lancedb";
import "@lancedb/lancedb/embedding/openai";
import { LanceSchema, getRegistry, register } from "@lancedb/lancedb/embedding";
import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
import { type Float, Float32, Utf8 } from "apache-arrow";
// --8<-- [end:imports]
import { withTempDirectory } from "./util.ts";
const openAiTest = process.env.OPENAI_API_KEY == null ? test.skip : test;
openAiTest("openai embeddings", async () => {
await withTempDirectory(async (databaseDir) => {
// --8<-- [start:openai_embeddings]
const db = await lancedb.connect(databaseDir);
const func = getRegistry()
.get("openai")
?.create({ model: "text-embedding-ada-002" }) as EmbeddingFunction;
const wordsSchema = LanceSchema({
text: func.sourceField(new Utf8()),
vector: func.vectorField(),
});
const tbl = await db.createEmptyTable("words", wordsSchema, {
mode: "overwrite",
});
await tbl.add([{ text: "hello world" }, { text: "goodbye world" }]);
const query = "greetings";
const actual = (await tbl.search(query).limit(1).toArray())[0];
// --8<-- [end:openai_embeddings]
expect(actual).toHaveProperty("text");
});
});
test("custom embedding function", async () => {
await withTempDirectory(async (databaseDir) => {
// --8<-- [start:embedding_function]
const db = await lancedb.connect(databaseDir);
@register("my_embedding")
class MyEmbeddingFunction extends EmbeddingFunction<string> {
toJSON(): object {
return {};
}
ndims() {
return 3;
}
embeddingDataType(): Float {
return new Float32();
}
async computeQueryEmbeddings(_data: string) {
// This is a placeholder for a real embedding function
return [1, 2, 3];
}
async computeSourceEmbeddings(data: string[]) {
// This is a placeholder for a real embedding function
return Array.from({ length: data.length }).fill([
1, 2, 3,
]) as number[][];
}
}
const func = new MyEmbeddingFunction();
const data = [{ text: "pepperoni" }, { text: "pineapple" }];
// Option 1: manually specify the embedding function
const table = await db.createTable("vectors", data, {
embeddingFunction: {
function: func,
sourceColumn: "text",
vectorColumn: "vector",
},
mode: "overwrite",
});
// Option 2: provide the embedding function through a schema
const schema = LanceSchema({
text: func.sourceField(new Utf8()),
vector: func.vectorField(),
});
const table2 = await db.createTable("vectors2", data, {
schema,
mode: "overwrite",
});
// --8<-- [end:embedding_function]
expect(await table.countRows()).toBe(2);
expect(await table2.countRows()).toBe(2);
});
});

View File

@@ -1,83 +0,0 @@
// --8<-- [start:imports]
import * as lancedb from "@lancedb/lancedb";
import { LanceSchema, getRegistry, register } from "@lancedb/lancedb/embedding";
import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
import { type Float, Float32, Utf8 } from "apache-arrow";
// --8<-- [end:imports]
{
// --8<-- [start:openai_embeddings]
const db = await lancedb.connect("/tmp/db");
const func = getRegistry()
.get("openai")
?.create({ model: "text-embedding-ada-002" }) as EmbeddingFunction;
const wordsSchema = LanceSchema({
text: func.sourceField(new Utf8()),
vector: func.vectorField(),
});
const tbl = await db.createEmptyTable("words", wordsSchema, {
mode: "overwrite",
});
await tbl.add([{ text: "hello world" }, { text: "goodbye world" }]);
const query = "greetings";
const actual = (await (await tbl.search(query)).limit(1).toArray())[0];
// --8<-- [end:openai_embeddings]
console.log("result = ", actual.text);
}
{
// --8<-- [start:embedding_function]
const db = await lancedb.connect("/tmp/db");
@register("my_embedding")
class MyEmbeddingFunction extends EmbeddingFunction<string> {
toJSON(): object {
return {};
}
ndims() {
return 3;
}
embeddingDataType(): Float {
return new Float32();
}
async computeQueryEmbeddings(_data: string) {
// This is a placeholder for a real embedding function
return [1, 2, 3];
}
async computeSourceEmbeddings(data: string[]) {
// This is a placeholder for a real embedding function
return Array.from({ length: data.length }).fill([1, 2, 3]) as number[][];
}
}
const func = new MyEmbeddingFunction();
const data = [{ text: "pepperoni" }, { text: "pineapple" }];
// Option 1: manually specify the embedding function
const table = await db.createTable("vectors", data, {
embeddingFunction: {
function: func,
sourceColumn: "text",
vectorColumn: "vector",
},
mode: "overwrite",
});
// Option 2: provide the embedding function through a schema
const schema = LanceSchema({
text: func.sourceField(new Utf8()),
vector: func.vectorField(),
});
const table2 = await db.createTable("vectors2", data, {
schema,
mode: "overwrite",
});
// --8<-- [end:embedding_function]
}

View File

@@ -0,0 +1,42 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import { expect, test } from "@jest/globals";
import * as lancedb from "@lancedb/lancedb";
import { withTempDirectory } from "./util.ts";
test("filtering examples", async () => {
await withTempDirectory(async (databaseDir) => {
const db = await lancedb.connect(databaseDir);
const data = Array.from({ length: 10_000 }, (_, i) => ({
vector: Array(1536).fill(i),
id: i,
item: `item ${i}`,
strId: `${i}`,
}));
const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
// --8<-- [start:search]
const _result = await tbl
.search(Array(1536).fill(0.5))
.limit(1)
.where("id = 10")
.toArray();
// --8<-- [end:search]
// --8<-- [start:vec_search]
const result = await (
tbl.search(Array(1536).fill(0)) as lancedb.VectorQuery
)
.where("(item IN ('item 0', 'item 2')) AND (id > 10)")
.postfilter()
.toArray();
// --8<-- [end:vec_search]
expect(result.length).toBe(0);
// --8<-- [start:sql_search]
await tbl.query().where("id = 10").limit(10).toArray();
// --8<-- [end:sql_search]
});
});

View File

@@ -1,34 +0,0 @@
import * as lancedb from "@lancedb/lancedb";
const db = await lancedb.connect("data/sample-lancedb");
const data = Array.from({ length: 10_000 }, (_, i) => ({
vector: Array(1536).fill(i),
id: i,
item: `item ${i}`,
strId: `${i}`,
}));
const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
// --8<-- [start:search]
const _result = await tbl
.search(Array(1536).fill(0.5))
.limit(1)
.where("id = 10")
.toArray();
// --8<-- [end:search]
// --8<-- [start:vec_search]
await tbl
.search(Array(1536).fill(0))
.where("(item IN ('item 0', 'item 2')) AND (id > 10)")
.postfilter()
.toArray();
// --8<-- [end:vec_search]
// --8<-- [start:sql_search]
await tbl.query().where("id = 10").limit(10).toArray();
// --8<-- [end:sql_search]
console.log("SQL search: done");

View File

@@ -0,0 +1,45 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import { expect, test } from "@jest/globals";
import * as lancedb from "@lancedb/lancedb";
import { withTempDirectory } from "./util.ts";
test("full text search", async () => {
await withTempDirectory(async (databaseDir) => {
const db = await lancedb.connect(databaseDir);
const words = [
"apple",
"banana",
"cherry",
"date",
"elderberry",
"fig",
"grape",
];
const data = Array.from({ length: 10_000 }, (_, i) => ({
vector: Array(1536).fill(i),
id: i,
item: `item ${i}`,
strId: `${i}`,
doc: words[i % words.length],
}));
const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
await tbl.createIndex("doc", {
config: lancedb.Index.fts(),
});
// --8<-- [start:full_text_search]
const result = await tbl
.query()
.nearestToText("apple")
.select(["id", "doc"])
.limit(10)
.toArray();
expect(result.length).toBe(10);
// --8<-- [end:full_text_search]
});
});

View File

@@ -1,52 +0,0 @@
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import * as lancedb from "@lancedb/lancedb";
const db = await lancedb.connect("data/sample-lancedb");
const words = [
"apple",
"banana",
"cherry",
"date",
"elderberry",
"fig",
"grape",
];
const data = Array.from({ length: 10_000 }, (_, i) => ({
vector: Array(1536).fill(i),
id: i,
item: `item ${i}`,
strId: `${i}`,
doc: words[i % words.length],
}));
const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
await tbl.createIndex("doc", {
config: lancedb.Index.fts(),
});
// --8<-- [start:full_text_search]
let result = await tbl
.search("apple")
.select(["id", "doc"])
.limit(10)
.toArray();
console.log(result);
// --8<-- [end:full_text_search]
console.log("SQL search: done");

View File

@@ -0,0 +1,6 @@
/** @type {import('ts-jest').JestConfigWithTsJest} */
module.exports = {
preset: "ts-jest",
testEnvironment: "node",
testPathIgnorePatterns: ["./dist"],
};

View File

@@ -1,27 +0,0 @@
{
"compilerOptions": {
// Enable latest features
"lib": ["ESNext", "DOM"],
"target": "ESNext",
"module": "ESNext",
"moduleDetection": "force",
"jsx": "react-jsx",
"allowJs": true,
// Bundler mode
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"noEmit": true,
// Best practices
"strict": true,
"skipLibCheck": true,
"noFallthroughCasesInSwitch": true,
// Some stricter flags (disabled by default)
"noUnusedLocals": false,
"noUnusedParameters": false,
"noPropertyAccessFromIndexSignature": false
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -5,24 +5,29 @@
"main": "index.js",
"type": "module",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
"//1": "--experimental-vm-modules is needed to run jest with sentence-transformers",
"//2": "--testEnvironment is needed to run jest with sentence-transformers",
"//3": "See: https://github.com/huggingface/transformers.js/issues/57",
"test": "node --experimental-vm-modules node_modules/.bin/jest --testEnvironment jest-environment-node-single-context --verbose",
"lint": "biome check *.ts && biome format *.ts",
"lint-ci": "biome ci .",
"lint-fix": "biome check --write *.ts && npm run format",
"format": "biome format --write *.ts"
},
"author": "Lance Devs",
"license": "Apache-2.0",
"dependencies": {
"@lancedb/lancedb": "file:../",
"@xenova/transformers": "^2.17.2"
"@huggingface/transformers": "^3.0.2",
"@lancedb/lancedb": "file:../dist",
"openai": "^4.29.2",
"sharp": "^0.33.5"
},
"devDependencies": {
"@biomejs/biome": "^1.7.3",
"@jest/globals": "^29.7.0",
"jest": "^29.7.0",
"jest-environment-node-single-context": "^29.4.0",
"ts-jest": "^29.2.5",
"typescript": "^5.5.4"
},
"compilerOptions": {
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "Node",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true
}
}

View File

@@ -0,0 +1,42 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import { expect, test } from "@jest/globals";
// --8<-- [start:import]
import * as lancedb from "@lancedb/lancedb";
// --8<-- [end:import]
import { withTempDirectory } from "./util.ts";
test("full text search", async () => {
await withTempDirectory(async (databaseDir) => {
{
const db = await lancedb.connect(databaseDir);
const data = Array.from({ length: 10_000 }, (_, i) => ({
vector: Array(128).fill(i),
id: `${i}`,
content: "",
longId: `${i}`,
}));
await db.createTable("my_vectors", data);
}
// --8<-- [start:search1]
const db = await lancedb.connect(databaseDir);
const tbl = await db.openTable("my_vectors");
const results1 = await tbl.search(Array(128).fill(1.2)).limit(10).toArray();
// --8<-- [end:search1]
expect(results1.length).toBe(10);
// --8<-- [start:search2]
const results2 = await (
tbl.search(Array(128).fill(1.2)) as lancedb.VectorQuery
)
.distanceType("cosine")
.limit(10)
.toArray();
// --8<-- [end:search2]
expect(results2.length).toBe(10);
});
});

View File

@@ -1,38 +0,0 @@
// --8<-- [end:import]
import * as fs from "node:fs";
// --8<-- [start:import]
import * as lancedb from "@lancedb/lancedb";
async function setup() {
fs.rmSync("data/sample-lancedb", { recursive: true, force: true });
const db = await lancedb.connect("data/sample-lancedb");
const data = Array.from({ length: 10_000 }, (_, i) => ({
vector: Array(1536).fill(i),
id: `${i}`,
content: "",
longId: `${i}`,
}));
await db.createTable("my_vectors", data);
}
await setup();
// --8<-- [start:search1]
const db = await lancedb.connect("data/sample-lancedb");
const tbl = await db.openTable("my_vectors");
const _results1 = await tbl.search(Array(1536).fill(1.2)).limit(10).toArray();
// --8<-- [end:search1]
// --8<-- [start:search2]
const _results2 = await tbl
.search(Array(1536).fill(1.2))
.distanceType("cosine")
.limit(10)
.toArray();
console.log(_results2);
// --8<-- [end:search2]
console.log("search: done");

View File

@@ -1,50 +0,0 @@
import * as lancedb from "@lancedb/lancedb";
import { LanceSchema, getRegistry } from "@lancedb/lancedb/embedding";
import { Utf8 } from "apache-arrow";
const db = await lancedb.connect("/tmp/db");
const func = await getRegistry().get("huggingface").create();
const facts = [
"Albert Einstein was a theoretical physicist.",
"The capital of France is Paris.",
"The Great Wall of China is one of the Seven Wonders of the World.",
"Python is a popular programming language.",
"Mount Everest is the highest mountain in the world.",
"Leonardo da Vinci painted the Mona Lisa.",
"Shakespeare wrote Hamlet.",
"The human body has 206 bones.",
"The speed of light is approximately 299,792 kilometers per second.",
"Water boils at 100 degrees Celsius.",
"The Earth orbits the Sun.",
"The Pyramids of Giza are located in Egypt.",
"Coffee is one of the most popular beverages in the world.",
"Tokyo is the capital city of Japan.",
"Photosynthesis is the process by which plants make their food.",
"The Pacific Ocean is the largest ocean on Earth.",
"Mozart was a prolific composer of classical music.",
"The Internet is a global network of computers.",
"Basketball is a sport played with a ball and a hoop.",
"The first computer virus was created in 1983.",
"Artificial neural networks are inspired by the human brain.",
"Deep learning is a subset of machine learning.",
"IBM's Watson won Jeopardy! in 2011.",
"The first computer programmer was Ada Lovelace.",
"The first chatbot was ELIZA, created in the 1960s.",
].map((text) => ({ text }));
const factsSchema = LanceSchema({
text: func.sourceField(new Utf8()),
vector: func.vectorField(),
});
const tbl = await db.createTable("facts", facts, {
mode: "overwrite",
schema: factsSchema,
});
const query = "How many bones are in the human body?";
const actual = await tbl.search(query).limit(1).toArray();
console.log("Answer: ", actual[0]["text"]);

View File

@@ -0,0 +1,59 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import { expect, test } from "@jest/globals";
import { withTempDirectory } from "./util.ts";
import * as lancedb from "@lancedb/lancedb";
import "@lancedb/lancedb/embedding/transformers";
import { LanceSchema, getRegistry } from "@lancedb/lancedb/embedding";
import { Utf8 } from "apache-arrow";
test("full text search", async () => {
await withTempDirectory(async (databaseDir) => {
const db = await lancedb.connect(databaseDir);
const func = await getRegistry().get("huggingface").create();
const facts = [
"Albert Einstein was a theoretical physicist.",
"The capital of France is Paris.",
"The Great Wall of China is one of the Seven Wonders of the World.",
"Python is a popular programming language.",
"Mount Everest is the highest mountain in the world.",
"Leonardo da Vinci painted the Mona Lisa.",
"Shakespeare wrote Hamlet.",
"The human body has 206 bones.",
"The speed of light is approximately 299,792 kilometers per second.",
"Water boils at 100 degrees Celsius.",
"The Earth orbits the Sun.",
"The Pyramids of Giza are located in Egypt.",
"Coffee is one of the most popular beverages in the world.",
"Tokyo is the capital city of Japan.",
"Photosynthesis is the process by which plants make their food.",
"The Pacific Ocean is the largest ocean on Earth.",
"Mozart was a prolific composer of classical music.",
"The Internet is a global network of computers.",
"Basketball is a sport played with a ball and a hoop.",
"The first computer virus was created in 1983.",
"Artificial neural networks are inspired by the human brain.",
"Deep learning is a subset of machine learning.",
"IBM's Watson won Jeopardy! in 2011.",
"The first computer programmer was Ada Lovelace.",
"The first chatbot was ELIZA, created in the 1960s.",
].map((text) => ({ text }));
const factsSchema = LanceSchema({
text: func.sourceField(new Utf8()),
vector: func.vectorField(),
});
const tbl = await db.createTable("facts", facts, {
mode: "overwrite",
schema: factsSchema,
});
const query = "How many bones are in the human body?";
const actual = await tbl.search(query).limit(1).toArray();
expect(actual[0]["text"]).toBe("The human body has 206 bones.");
});
});

View File

@@ -0,0 +1,17 @@
{
"include": ["*.test.ts"],
"compilerOptions": {
"target": "es2022",
"module": "NodeNext",
"declaration": true,
"outDir": "./dist",
"strict": true,
"allowJs": true,
"resolveJsonModule": true,
"emitDecoratorMetadata": true,
"experimentalDecorators": true,
"moduleResolution": "NodeNext",
"allowImportingTsExtensions": true,
"emitDeclarationOnly": true
}
}

16
nodejs/examples/util.ts Normal file
View File

@@ -0,0 +1,16 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import * as fs from "fs";
import { tmpdir } from "os";
import * as path from "path";
export async function withTempDirectory(
fn: (tempDir: string) => Promise<void>,
) {
const tmpDirPath = fs.mkdtempSync(path.join(tmpdir(), "temp-dir-"));
try {
await fn(tmpDirPath);
} finally {
fs.rmSync(tmpDirPath, { recursive: true });
}
}

View File

@@ -4,4 +4,5 @@ module.exports = {
testEnvironment: "node",
moduleDirectories: ["node_modules", "./dist"],
moduleFileExtensions: ["js", "ts"],
modulePathIgnorePatterns: ["<rootDir>/examples/"],
};

View File

@@ -47,8 +47,8 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
string,
Partial<XenovaTransformerOptions>
> {
#model?: import("@xenova/transformers").PreTrainedModel;
#tokenizer?: import("@xenova/transformers").PreTrainedTokenizer;
#model?: import("@huggingface/transformers").PreTrainedModel;
#tokenizer?: import("@huggingface/transformers").PreTrainedTokenizer;
#modelName: XenovaTransformerOptions["model"];
#initialized = false;
#tokenizerOptions: XenovaTransformerOptions["tokenizerOptions"];
@@ -92,18 +92,19 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
try {
// SAFETY:
// since typescript transpiles `import` to `require`, we need to do this in an unsafe way
// We can't use `require` because `@xenova/transformers` is an ESM module
// We can't use `require` because `@huggingface/transformers` is an ESM module
// and we can't use `import` directly because typescript will transpile it to `require`.
// and we want to remain compatible with both ESM and CJS modules
// so we use `eval` to bypass typescript for this specific import.
transformers = await eval('import("@xenova/transformers")');
transformers = await eval('import("@huggingface/transformers")');
} catch (e) {
throw new Error(`error loading @xenova/transformers\nReason: ${e}`);
throw new Error(`error loading @huggingface/transformers\nReason: ${e}`);
}
try {
this.#model = await transformers.AutoModel.from_pretrained(
this.#modelName,
{ dtype: "fp32" },
);
} catch (e) {
throw new Error(
@@ -128,7 +129,8 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
} else {
const config = this.#model!.config;
const ndims = config["hidden_size"];
// biome-ignore lint/style/useNamingConvention: we don't control this name.
const ndims = (config as unknown as { hidden_size: number }).hidden_size;
if (!ndims) {
throw new Error(
"hidden_size not found in model config, you may need to manually specify the embedding dimensions. ",
@@ -183,7 +185,7 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
}
const tensorDiv = (
src: import("@xenova/transformers").Tensor,
src: import("@huggingface/transformers").Tensor,
divBy: number,
) => {
for (let i = 0; i < src.data.length; ++i) {

View File

@@ -571,4 +571,9 @@ export class Query extends QueryBase<NativeQuery> {
return new VectorQuery(vectorQuery);
}
}
nearestToText(query: string, columns?: string[]): Query {
this.doCall((inner) => inner.fullTextSearch(query, columns));
return this;
}
}

1432
nodejs/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -85,7 +85,7 @@
"reflect-metadata": "^0.2.2"
},
"optionalDependencies": {
"@xenova/transformers": ">=2.17 < 3",
"@huggingface/transformers": "^3.0.2",
"openai": "^4.29.2"
},
"peerDependencies": {

View File

@@ -12,7 +12,7 @@
"experimentalDecorators": true,
"moduleResolution": "Node"
},
"exclude": ["./dist/*"],
"exclude": ["./dist/*", "./examples/*"],
"typedocOptions": {
"entryPoints": ["lancedb/index.ts"],
"out": "../docs/src/javascript/",