Merge branch 'tecmie/embeddings-openai' of github.com:tecmie/lancedb into tecmie-tecmie/embeddings-openai

This commit is contained in:
Chang She
2024-01-19 16:43:53 -08:00
committed by Weston Pace
4 changed files with 31 additions and 4851 deletions

4826
node/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -46,7 +46,7 @@
"eslint-plugin-n": "^15.7.0", "eslint-plugin-n": "^15.7.0",
"eslint-plugin-promise": "^6.1.1", "eslint-plugin-promise": "^6.1.1",
"mocha": "^10.2.0", "mocha": "^10.2.0",
"openai": "^3.2.1", "openai": "^4.24.1",
"sinon": "^15.1.0", "sinon": "^15.1.0",
"temp": "^0.9.4", "temp": "^0.9.4",
"ts-node": "^10.9.1", "ts-node": "^10.9.1",

View File

@@ -13,36 +13,44 @@
// limitations under the License. // limitations under the License.
import { type EmbeddingFunction } from '../index' import { type EmbeddingFunction } from '../index'
import type OpenAI from 'openai'
export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> { export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
private readonly _openai: any private readonly _openai: OpenAI
private readonly _modelName: string private readonly _modelName: string
constructor (sourceColumn: string, openAIKey: string, modelName: string = 'text-embedding-ada-002') { constructor (sourceColumn: string, openAIKey: string, modelName: string = 'text-embedding-ada-002') {
let openai /**
* @type {import("openai").default}
*/
let Openai
try { try {
// eslint-disable-next-line @typescript-eslint/no-var-requires // eslint-disable-next-line @typescript-eslint/no-var-requires
openai = require('openai') Openai = require('openai')
} catch { } catch {
throw new Error('please install openai using npm install openai') throw new Error('please install openai@^4.24.1 using npm install openai')
} }
this.sourceColumn = sourceColumn this.sourceColumn = sourceColumn
const configuration = new openai.Configuration({ const configuration = {
apiKey: openAIKey apiKey: openAIKey
}) }
this._openai = new openai.OpenAIApi(configuration)
this._openai = new Openai(configuration)
this._modelName = modelName this._modelName = modelName
console.log({ op: this._openai.embeddings.create, md: this._modelName })
} }
async embed (data: string[]): Promise<number[][]> { async embed (data: string[]): Promise<number[][]> {
const response = await this._openai.createEmbedding({ const response = await this._openai.embeddings.create({
model: this._modelName, model: this._modelName,
input: data input: data
}) })
const embeddings: number[][] = [] const embeddings: number[][] = []
for (let i = 0; i < response.data.data.length; i++) { for (let i = 0; i < response.data.length; i++) {
embeddings.push(response.data.data[i].embedding as number[]) embeddings.push(response.data[i].embedding)
} }
return embeddings return embeddings
} }

View File

@@ -19,33 +19,31 @@ import { OpenAIEmbeddingFunction } from '../../embedding/openai'
import { isEmbeddingFunction } from '../../embedding/embedding_function' import { isEmbeddingFunction } from '../../embedding/embedding_function'
// eslint-disable-next-line @typescript-eslint/no-var-requires // eslint-disable-next-line @typescript-eslint/no-var-requires
const { OpenAIApi } = require('openai') const OpenAIApi = require('openai')
// eslint-disable-next-line @typescript-eslint/no-var-requires // eslint-disable-next-line @typescript-eslint/no-var-requires
const { stub } = require('sinon') const { stub } = require('sinon')
describe('OpenAPIEmbeddings', function () { describe('OpenAPIEmbeddings', function () {
const stubValue = { const stubValue = {
data: { data: [
data: [ {
{ embedding: Array(1536).fill(1.0)
embedding: Array(1536).fill(1.0) },
}, {
{ embedding: Array(1536).fill(2.0)
embedding: Array(1536).fill(2.0) }
} ]
]
}
} }
describe('#embed', function () { describe('#embed', function () {
it('should create vector embeddings', async function () { it('should create vector embeddings', async function () {
const openAIStub = stub(OpenAIApi.prototype, 'createEmbedding').returns(stubValue) const openAIStub = stub(OpenAIApi.Embeddings.prototype, 'create').returns(stubValue)
const f = new OpenAIEmbeddingFunction('text', 'sk-key') const f = new OpenAIEmbeddingFunction('text', 'sk-key')
const vectors = await f.embed(['abc', 'def']) const vectors = await f.embed(['abc', 'def'])
assert.isTrue(openAIStub.calledOnce) assert.isTrue(openAIStub.calledOnce)
assert.equal(vectors.length, 2) assert.equal(vectors.length, 2)
assert.deepEqual(vectors[0], stubValue.data.data[0].embedding) assert.deepEqual(vectors[0], stubValue.data[0].embedding)
assert.deepEqual(vectors[1], stubValue.data.data[1].embedding) assert.deepEqual(vectors[1], stubValue.data[1].embedding)
}) })
}) })