Merge branch 'tecmie/embeddings-openai' of github.com:tecmie/lancedb into tecmie-tecmie/embeddings-openai

This commit is contained in:
Chang She
2024-01-19 16:43:53 -08:00
committed by Weston Pace
4 changed files with 31 additions and 4851 deletions

View File

@@ -13,36 +13,44 @@
// limitations under the License.
import { type EmbeddingFunction } from '../index'
import type OpenAI from 'openai'
export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
private readonly _openai: any
private readonly _openai: OpenAI
private readonly _modelName: string
constructor (sourceColumn: string, openAIKey: string, modelName: string = 'text-embedding-ada-002') {
let openai
/**
* @type {import("openai").default}
*/
let Openai
try {
// eslint-disable-next-line @typescript-eslint/no-var-requires
openai = require('openai')
Openai = require('openai')
} catch {
throw new Error('please install openai using npm install openai')
throw new Error('please install openai@^4.24.1 using npm install openai')
}
this.sourceColumn = sourceColumn
const configuration = new openai.Configuration({
const configuration = {
apiKey: openAIKey
})
this._openai = new openai.OpenAIApi(configuration)
}
this._openai = new Openai(configuration)
this._modelName = modelName
console.log({ op: this._openai.embeddings.create, md: this._modelName })
}
async embed (data: string[]): Promise<number[][]> {
const response = await this._openai.createEmbedding({
const response = await this._openai.embeddings.create({
model: this._modelName,
input: data
})
const embeddings: number[][] = []
for (let i = 0; i < response.data.data.length; i++) {
embeddings.push(response.data.data[i].embedding as number[])
for (let i = 0; i < response.data.length; i++) {
embeddings.push(response.data[i].embedding)
}
return embeddings
}

View File

@@ -19,33 +19,31 @@ import { OpenAIEmbeddingFunction } from '../../embedding/openai'
import { isEmbeddingFunction } from '../../embedding/embedding_function'
// eslint-disable-next-line @typescript-eslint/no-var-requires
const { OpenAIApi } = require('openai')
const OpenAIApi = require('openai')
// eslint-disable-next-line @typescript-eslint/no-var-requires
const { stub } = require('sinon')
describe('OpenAPIEmbeddings', function () {
const stubValue = {
data: {
data: [
{
embedding: Array(1536).fill(1.0)
},
{
embedding: Array(1536).fill(2.0)
}
]
}
data: [
{
embedding: Array(1536).fill(1.0)
},
{
embedding: Array(1536).fill(2.0)
}
]
}
describe('#embed', function () {
it('should create vector embeddings', async function () {
const openAIStub = stub(OpenAIApi.prototype, 'createEmbedding').returns(stubValue)
const openAIStub = stub(OpenAIApi.Embeddings.prototype, 'create').returns(stubValue)
const f = new OpenAIEmbeddingFunction('text', 'sk-key')
const vectors = await f.embed(['abc', 'def'])
assert.isTrue(openAIStub.calledOnce)
assert.equal(vectors.length, 2)
assert.deepEqual(vectors[0], stubValue.data.data[0].embedding)
assert.deepEqual(vectors[1], stubValue.data.data[1].embedding)
assert.deepEqual(vectors[0], stubValue.data[0].embedding)
assert.deepEqual(vectors[1], stubValue.data[1].embedding)
})
})