mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-06 20:02:58 +00:00
Merge branch 'tecmie/embeddings-openai' of github.com:tecmie/lancedb into tecmie-tecmie/embeddings-openai
This commit is contained in:
4826
node/package-lock.json
generated
4826
node/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -46,7 +46,7 @@
|
|||||||
"eslint-plugin-n": "^15.7.0",
|
"eslint-plugin-n": "^15.7.0",
|
||||||
"eslint-plugin-promise": "^6.1.1",
|
"eslint-plugin-promise": "^6.1.1",
|
||||||
"mocha": "^10.2.0",
|
"mocha": "^10.2.0",
|
||||||
"openai": "^3.2.1",
|
"openai": "^4.24.1",
|
||||||
"sinon": "^15.1.0",
|
"sinon": "^15.1.0",
|
||||||
"temp": "^0.9.4",
|
"temp": "^0.9.4",
|
||||||
"ts-node": "^10.9.1",
|
"ts-node": "^10.9.1",
|
||||||
|
|||||||
@@ -13,36 +13,44 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
import { type EmbeddingFunction } from '../index'
|
import { type EmbeddingFunction } from '../index'
|
||||||
|
import type OpenAI from 'openai'
|
||||||
|
|
||||||
export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
|
export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
|
||||||
private readonly _openai: any
|
private readonly _openai: OpenAI
|
||||||
private readonly _modelName: string
|
private readonly _modelName: string
|
||||||
|
|
||||||
constructor (sourceColumn: string, openAIKey: string, modelName: string = 'text-embedding-ada-002') {
|
constructor (sourceColumn: string, openAIKey: string, modelName: string = 'text-embedding-ada-002') {
|
||||||
let openai
|
/**
|
||||||
|
* @type {import("openai").default}
|
||||||
|
*/
|
||||||
|
let Openai
|
||||||
try {
|
try {
|
||||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||||
openai = require('openai')
|
Openai = require('openai')
|
||||||
} catch {
|
} catch {
|
||||||
throw new Error('please install openai using npm install openai')
|
throw new Error('please install openai@^4.24.1 using npm install openai')
|
||||||
}
|
}
|
||||||
|
|
||||||
this.sourceColumn = sourceColumn
|
this.sourceColumn = sourceColumn
|
||||||
const configuration = new openai.Configuration({
|
const configuration = {
|
||||||
apiKey: openAIKey
|
apiKey: openAIKey
|
||||||
})
|
}
|
||||||
this._openai = new openai.OpenAIApi(configuration)
|
|
||||||
|
this._openai = new Openai(configuration)
|
||||||
this._modelName = modelName
|
this._modelName = modelName
|
||||||
|
|
||||||
|
console.log({ op: this._openai.embeddings.create, md: this._modelName })
|
||||||
}
|
}
|
||||||
|
|
||||||
async embed (data: string[]): Promise<number[][]> {
|
async embed (data: string[]): Promise<number[][]> {
|
||||||
const response = await this._openai.createEmbedding({
|
const response = await this._openai.embeddings.create({
|
||||||
model: this._modelName,
|
model: this._modelName,
|
||||||
input: data
|
input: data
|
||||||
})
|
})
|
||||||
|
|
||||||
const embeddings: number[][] = []
|
const embeddings: number[][] = []
|
||||||
for (let i = 0; i < response.data.data.length; i++) {
|
for (let i = 0; i < response.data.length; i++) {
|
||||||
embeddings.push(response.data.data[i].embedding as number[])
|
embeddings.push(response.data[i].embedding)
|
||||||
}
|
}
|
||||||
return embeddings
|
return embeddings
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,33 +19,31 @@ import { OpenAIEmbeddingFunction } from '../../embedding/openai'
|
|||||||
import { isEmbeddingFunction } from '../../embedding/embedding_function'
|
import { isEmbeddingFunction } from '../../embedding/embedding_function'
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||||
const { OpenAIApi } = require('openai')
|
const OpenAIApi = require('openai')
|
||||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||||
const { stub } = require('sinon')
|
const { stub } = require('sinon')
|
||||||
|
|
||||||
describe('OpenAPIEmbeddings', function () {
|
describe('OpenAPIEmbeddings', function () {
|
||||||
const stubValue = {
|
const stubValue = {
|
||||||
data: {
|
data: [
|
||||||
data: [
|
{
|
||||||
{
|
embedding: Array(1536).fill(1.0)
|
||||||
embedding: Array(1536).fill(1.0)
|
},
|
||||||
},
|
{
|
||||||
{
|
embedding: Array(1536).fill(2.0)
|
||||||
embedding: Array(1536).fill(2.0)
|
}
|
||||||
}
|
]
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
describe('#embed', function () {
|
describe('#embed', function () {
|
||||||
it('should create vector embeddings', async function () {
|
it('should create vector embeddings', async function () {
|
||||||
const openAIStub = stub(OpenAIApi.prototype, 'createEmbedding').returns(stubValue)
|
const openAIStub = stub(OpenAIApi.Embeddings.prototype, 'create').returns(stubValue)
|
||||||
const f = new OpenAIEmbeddingFunction('text', 'sk-key')
|
const f = new OpenAIEmbeddingFunction('text', 'sk-key')
|
||||||
const vectors = await f.embed(['abc', 'def'])
|
const vectors = await f.embed(['abc', 'def'])
|
||||||
assert.isTrue(openAIStub.calledOnce)
|
assert.isTrue(openAIStub.calledOnce)
|
||||||
assert.equal(vectors.length, 2)
|
assert.equal(vectors.length, 2)
|
||||||
assert.deepEqual(vectors[0], stubValue.data.data[0].embedding)
|
assert.deepEqual(vectors[0], stubValue.data[0].embedding)
|
||||||
assert.deepEqual(vectors[1], stubValue.data.data[1].embedding)
|
assert.deepEqual(vectors[1], stubValue.data[1].embedding)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user