add openai embedding function to nodejs client (#107)

- openai is an optional dependency for lancedb
- added an example to show how to use it
This commit is contained in:
gsilvestrin
2023-06-01 10:25:00 -07:00
committed by GitHub
parent 99cbda8b07
commit 3e14b357e7
12 changed files with 683 additions and 38 deletions

View File

@@ -0,0 +1,28 @@
// Copyright 2023 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/**
* An embedding function that automatically creates vector representation for a given column.
*/
export interface EmbeddingFunction<T> {
/**
* The name of the column that will be used as input for the Embedding Function.
*/
sourceColumn: string
/**
* Creates a vector representation for the given values.
*/
embed: (data: T[]) => Promise<number[][]>
}

View File

@@ -0,0 +1,51 @@
// Copyright 2023 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { type EmbeddingFunction } from '../index'
export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
private readonly _openai: any
private readonly _modelName: string
constructor (sourceColumn: string, openAIKey: string, modelName: string = 'text-embedding-ada-002') {
let openai
try {
// eslint-disable-next-line @typescript-eslint/no-var-requires
openai = require('openai')
} catch {
throw new Error('please install openai using npm install openai')
}
this.sourceColumn = sourceColumn
const configuration = new openai.Configuration({
apiKey: openAIKey
})
this._openai = new openai.OpenAIApi(configuration)
this._modelName = modelName
}
async embed (data: string[]): Promise<number[][]> {
const response = await this._openai.createEmbedding({
model: this._modelName,
input: data
})
const embeddings: number[][] = []
for (let i = 0; i < response.data.data.length; i++) {
embeddings.push(response.data.data[i].embedding as number[])
}
return embeddings
}
sourceColumn: string
}