mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-19 21:10:41 +00:00
Added transformersJS example to docs and node/examples (#297)
This commit is contained in:
@@ -68,6 +68,7 @@ nav:
|
||||
- Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
|
||||
- Javascript examples:
|
||||
- YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
|
||||
- TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
|
||||
- References:
|
||||
- Vector Search: search.md
|
||||
- SQL filters: sql.md
|
||||
|
||||
117
docs/src/examples/transformerjs_embedding_search_nodejs.md
Normal file
117
docs/src/examples/transformerjs_embedding_search_nodejs.md
Normal file
@@ -0,0 +1,117 @@
|
||||
# Vector embedding search using TransformersJS and NodeJS
|
||||
|
||||
This example shows how to use the [transformers.js](https://github.com/xenova/transformers.js) library to perform vector embedding search using LanceDB's Javascript API.
|
||||
|
||||
|
||||
### Setting up
|
||||
First, install the dependencies:
|
||||
```bash
|
||||
npm install vectordb
|
||||
npm i @xenova/transformers
|
||||
```
|
||||
|
||||
We will also be using the [all-MiniLM-L6-v2](https://huggingface.co/Xenova/all-MiniLM-L6-v2) model to make it compatible with Transformers.js
|
||||
|
||||
Within our `index.js` file we will import the necessary libraries and define our model and database:
|
||||
|
||||
```javascript
|
||||
const lancedb = require('vectordb')
|
||||
const { pipeline } = await import('@xenova/transformers')
|
||||
const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
||||
```
|
||||
|
||||
### Creating the embedding function
|
||||
|
||||
Next, we will create a function that will take in a string and return the vector embedding of that string. We will use the `pipe` function we defined earlier to get the vector embedding of the string.
|
||||
|
||||
```javascript
|
||||
// Define the function. `sourceColumn` is required for LanceDB to know
|
||||
// which column to use as input.
|
||||
const embed_fun = {}
|
||||
embed_fun.sourceColumn = 'text'
|
||||
embed_fun.embed = async function (batch) {
|
||||
let result = []
|
||||
// Given a batch of strings, we will use the `pipe` function to get
|
||||
// the vector embedding of each string.
|
||||
for (let text of batch) {
|
||||
// 'mean' pooling and normalizing allows the embeddings to share the
|
||||
// same length.
|
||||
const res = await pipe(text, { pooling: 'mean', normalize: true })
|
||||
result.push(Array.from(res['data']))
|
||||
}
|
||||
return (result)
|
||||
}
|
||||
```
|
||||
|
||||
### Creating the database
|
||||
|
||||
Now, we will create the LanceDB database and add the embedding function we defined earlier.
|
||||
|
||||
```javascript
|
||||
// Link a folder and create a table with data
|
||||
const db = await lancedb.connect('data/sample-lancedb')
|
||||
|
||||
// You can also import any other data, but make sure that you have a column
|
||||
// for the embedding function to use.
|
||||
const data = [
|
||||
{ id: 1, text: 'Cherry', type: 'fruit' },
|
||||
{ id: 2, text: 'Carrot', type: 'vegetable' },
|
||||
{ id: 3, text: 'Potato', type: 'vegetable' },
|
||||
{ id: 4, text: 'Apple', type: 'fruit' },
|
||||
{ id: 5, text: 'Banana', type: 'fruit' }
|
||||
]
|
||||
|
||||
// Create the table with the embedding function
|
||||
const table = await db.createTable('food_table', data, "create", embed_fun)
|
||||
```
|
||||
|
||||
### Performing the search
|
||||
|
||||
Now, we can perform the search using the `search` function. LanceDB automatically uses the embedding function we defined earlier to get the vector embedding of the query string.
|
||||
|
||||
```javascript
|
||||
// Query the table
|
||||
const results = await table
|
||||
.search("a sweet fruit to eat")
|
||||
.metricType("cosine")
|
||||
.limit(2)
|
||||
.execute()
|
||||
console.log(results.map(r => r.text))
|
||||
```
|
||||
```bash
|
||||
[ 'Banana', 'Cherry' ]
|
||||
```
|
||||
|
||||
Output of `results`:
|
||||
```bash
|
||||
[
|
||||
{
|
||||
vector: Float32Array(384) [
|
||||
-0.057455405592918396,
|
||||
0.03617725893855095,
|
||||
-0.0367760956287384,
|
||||
... 381 more items
|
||||
],
|
||||
id: 5,
|
||||
text: 'Banana',
|
||||
type: 'fruit',
|
||||
score: 0.4919965863227844
|
||||
},
|
||||
{
|
||||
vector: Float32Array(384) [
|
||||
0.0009714411571621895,
|
||||
0.008223623037338257,
|
||||
0.009571489877998829,
|
||||
... 381 more items
|
||||
],
|
||||
id: 1,
|
||||
text: 'Cherry',
|
||||
type: 'fruit',
|
||||
score: 0.5540297031402588
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### Wrapping it up
|
||||
|
||||
In this example, we showed how to use the `transformers.js` library to perform vector embedding search using LanceDB's Javascript API. You can find the full code for this example on [Github](https://github.com/lancedb/lancedb/blob/main/node/examples/js-transformers/index.js)!
|
||||
66
node/examples/js-transformers/index.js
Normal file
66
node/examples/js-transformers/index.js
Normal file
@@ -0,0 +1,66 @@
|
||||
// Copyright 2023 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
'use strict'
|
||||
|
||||
|
||||
async function example() {
|
||||
|
||||
const lancedb = require('vectordb')
|
||||
|
||||
// Import transformers and the all-MiniLM-L6-v2 model (https://huggingface.co/Xenova/all-MiniLM-L6-v2)
|
||||
const { pipeline } = await import('@xenova/transformers')
|
||||
const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
||||
|
||||
|
||||
// Create embedding function from pipeline which returns a list of vectors from batch
|
||||
// sourceColumn is the name of the column in the data to be embedded
|
||||
//
|
||||
// Output of pipe is a Tensor { data: Float32Array(384) }, so filter for the vector
|
||||
const embed_fun = {}
|
||||
embed_fun.sourceColumn = 'text'
|
||||
embed_fun.embed = async function (batch) {
|
||||
let result = []
|
||||
for (let text of batch) {
|
||||
const res = await pipe(text, { pooling: 'mean', normalize: true })
|
||||
result.push(Array.from(res['data']))
|
||||
}
|
||||
return (result)
|
||||
}
|
||||
|
||||
// Link a folder and create a table with data
|
||||
const db = await lancedb.connect('data/sample-lancedb')
|
||||
|
||||
const data = [
|
||||
{ id: 1, text: 'Cherry', type: 'fruit' },
|
||||
{ id: 2, text: 'Carrot', type: 'vegetable' },
|
||||
{ id: 3, text: 'Potato', type: 'vegetable' },
|
||||
{ id: 4, text: 'Apple', type: 'fruit' },
|
||||
{ id: 5, text: 'Banana', type: 'fruit' }
|
||||
]
|
||||
|
||||
const table = await db.createTable('food_table', data, "create", embed_fun)
|
||||
|
||||
|
||||
// Query the table
|
||||
const results = await table
|
||||
.search("a sweet fruit to eat")
|
||||
.metricType("cosine")
|
||||
.limit(2)
|
||||
.execute()
|
||||
console.log(results.map(r => r.text))
|
||||
|
||||
}
|
||||
|
||||
example().then(_ => { console.log("Done!") })
|
||||
16
node/examples/js-transformers/package.json
Normal file
16
node/examples/js-transformers/package.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"name": "vectordb-example-js-transformers",
|
||||
"version": "1.0.0",
|
||||
"description": "Example for using transformers.js with lancedb",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "Lance Devs",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@xenova/transformers": "^2.4.1",
|
||||
"vectordb": "^0.1.12"
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user