mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 05:19:58 +00:00
update
This commit is contained in:
@@ -205,6 +205,7 @@ nav:
|
|||||||
- PromptTools: integrations/prompttools.md
|
- PromptTools: integrations/prompttools.md
|
||||||
- dlt: integrations/dlt.md
|
- dlt: integrations/dlt.md
|
||||||
- phidata: integrations/phidata.md
|
- phidata: integrations/phidata.md
|
||||||
|
- Genkit: integrations/genkit.md
|
||||||
- 🎯 Examples:
|
- 🎯 Examples:
|
||||||
- Overview: examples/index.md
|
- Overview: examples/index.md
|
||||||
- 🐍 Python:
|
- 🐍 Python:
|
||||||
@@ -331,6 +332,7 @@ nav:
|
|||||||
- PromptTools: integrations/prompttools.md
|
- PromptTools: integrations/prompttools.md
|
||||||
- dlt: integrations/dlt.md
|
- dlt: integrations/dlt.md
|
||||||
- phidata: integrations/phidata.md
|
- phidata: integrations/phidata.md
|
||||||
|
- Genkit: integrations/genkit.md
|
||||||
- Examples:
|
- Examples:
|
||||||
- examples/index.md
|
- examples/index.md
|
||||||
- 🐍 Python:
|
- 🐍 Python:
|
||||||
|
|||||||
181
docs/src/integrations/genkit.md
Normal file
181
docs/src/integrations/genkit.md
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
### genkitx-lancedb
|
||||||
|
This is a lancedb plugin for genkit framework. It allows you to use LanceDB for ingesting and rereiving data using genkit framework.
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
```bash
|
||||||
|
pnpm install genkitx-lancedb
|
||||||
|
```
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
Adding LanceDB plugin to your genkit instance.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import { lancedbIndexerRef, lancedb, lancedbRetrieverRef, WriteMode } from 'genkitx-lancedb';
|
||||||
|
import { textEmbedding004, vertexAI } from '@genkit-ai/vertexai';
|
||||||
|
import { gemini } from '@genkit-ai/vertexai';
|
||||||
|
import { z, genkit } from 'genkit';
|
||||||
|
import { Document } from 'genkit/retriever';
|
||||||
|
import { chunk } from 'llm-chunk';
|
||||||
|
import { readFile } from 'fs/promises';
|
||||||
|
import path from 'path';
|
||||||
|
import pdf from 'pdf-parse/lib/pdf-parse';
|
||||||
|
|
||||||
|
const ai = genkit({
|
||||||
|
plugins: [
|
||||||
|
// vertexAI provides the textEmbedding004 embedder
|
||||||
|
vertexAI(),
|
||||||
|
|
||||||
|
// the local vector store requires an embedder to translate from text to vector
|
||||||
|
lancedb([
|
||||||
|
{
|
||||||
|
dbUri: '.db', // optional lancedb uri, default to .db
|
||||||
|
tableName: 'table', // optional table name, default to table
|
||||||
|
embedder: textEmbedding004,
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
You can run this app with the following command:
|
||||||
|
```bash
|
||||||
|
genkit start -- tsx --watch src/index.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
This'll add LanceDB as a retriever and indexer to the genkit instance. You can see it in the GUI view
|
||||||
|
<img width="1710" alt="Screenshot 2025-05-11 at 7 21 05 PM" src="https://github.com/user-attachments/assets/e752f7f4-785b-4797-a11e-72ab06a531b7" />
|
||||||
|
|
||||||
|
**Testing retrieval on a sample table**
|
||||||
|
Let's see the raw retrieval results
|
||||||
|
|
||||||
|
<img width="1710" alt="Screenshot 2025-05-11 at 7 21 05 PM" src="https://github.com/user-attachments/assets/b8d356ed-8421-4790-8fc0-d6af563b9657" />
|
||||||
|
On running this query, you'll 5 results fetched from the lancedb table, where each result looks something like this:
|
||||||
|
<img width="1417" alt="Screenshot 2025-05-11 at 7 21 18 PM" src="https://github.com/user-attachments/assets/77429525-36e2-4da6-a694-e58c1cf9eb83" />
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Creating a custom RAG flow
|
||||||
|
|
||||||
|
Now that we've seen how you can use LanceDB for in a genkit pipeline, let's refine the flow and create a RAG. A RAG flow will consist of an index and a retreiver with its outputs postprocessed an fed into an LLM for final response
|
||||||
|
|
||||||
|
### Creating custom indexer flows
|
||||||
|
You can also create custom indexer flows, utilizing more options and features provided by LanceDB.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
export const menuPdfIndexer = lancedbIndexerRef({
|
||||||
|
// Using all defaults, for dbUri, tableName, and embedder, etc
|
||||||
|
});
|
||||||
|
|
||||||
|
const chunkingConfig = {
|
||||||
|
minLength: 1000,
|
||||||
|
maxLength: 2000,
|
||||||
|
splitter: 'sentence',
|
||||||
|
overlap: 100,
|
||||||
|
delimiters: '',
|
||||||
|
} as any;
|
||||||
|
|
||||||
|
|
||||||
|
async function extractTextFromPdf(filePath: string) {
|
||||||
|
const pdfFile = path.resolve(filePath);
|
||||||
|
const dataBuffer = await readFile(pdfFile);
|
||||||
|
const data = await pdf(dataBuffer);
|
||||||
|
return data.text;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const indexMenu = ai.defineFlow(
|
||||||
|
{
|
||||||
|
name: 'indexMenu',
|
||||||
|
inputSchema: z.string().describe('PDF file path'),
|
||||||
|
outputSchema: z.void(),
|
||||||
|
},
|
||||||
|
async (filePath: string) => {
|
||||||
|
filePath = path.resolve(filePath);
|
||||||
|
|
||||||
|
// Read the pdf.
|
||||||
|
const pdfTxt = await ai.run('extract-text', () =>
|
||||||
|
extractTextFromPdf(filePath)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Divide the pdf text into segments.
|
||||||
|
const chunks = await ai.run('chunk-it', async () =>
|
||||||
|
chunk(pdfTxt, chunkingConfig)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Convert chunks of text into documents to store in the index.
|
||||||
|
const documents = chunks.map((text) => {
|
||||||
|
return Document.fromText(text, { filePath });
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add documents to the index.
|
||||||
|
await ai.index({
|
||||||
|
indexer: menuPdfIndexer,
|
||||||
|
documents,
|
||||||
|
options: {
|
||||||
|
writeMode: WriteMode.Overwrite,
|
||||||
|
} as any
|
||||||
|
});
|
||||||
|
}
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
<img width="1316" alt="Screenshot 2025-05-11 at 8 35 56 PM" src="https://github.com/user-attachments/assets/e2a20ce4-d1d0-4fa2-9a84-f2cc26e3a29f" />
|
||||||
|
|
||||||
|
In your console, you can see the logs
|
||||||
|
|
||||||
|
<img width="511" alt="Screenshot 2025-05-11 at 7 19 14 PM" src="https://github.com/user-attachments/assets/243f26c5-ed38-40b6-b661-002f40f0423a" />
|
||||||
|
|
||||||
|
### Creating custom retriever flows
|
||||||
|
You can also create custom retriever flows, utilizing more options and features provided by LanceDB.
|
||||||
|
```ts
|
||||||
|
export const menuRetriever = lancedbRetrieverRef({
|
||||||
|
tableName: "table", // Use the same table name as the indexer.
|
||||||
|
displayName: "Menu", // Use a custom display name.
|
||||||
|
|
||||||
|
export const menuQAFlow = ai.defineFlow(
|
||||||
|
{ name: "Menu", inputSchema: z.string(), outputSchema: z.string() },
|
||||||
|
async (input: string) => {
|
||||||
|
// retrieve relevant documents
|
||||||
|
const docs = await ai.retrieve({
|
||||||
|
retriever: menuRetriever,
|
||||||
|
query: input,
|
||||||
|
options: {
|
||||||
|
k: 3,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const extractedContent = docs.map(doc => {
|
||||||
|
if (doc.content && Array.isArray(doc.content) && doc.content.length > 0) {
|
||||||
|
if (doc.content[0].media && doc.content[0].media.url) {
|
||||||
|
return doc.content[0].media.url;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "No content found";
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log("Extracted content:", extractedContent);
|
||||||
|
|
||||||
|
const { text } = await ai.generate({
|
||||||
|
model: gemini('gemini-2.0-flash'),
|
||||||
|
prompt: `
|
||||||
|
You are acting as a helpful AI assistant that can answer
|
||||||
|
questions about the food available on the menu at Genkit Grub Pub.
|
||||||
|
|
||||||
|
Use only the context provided to answer the question.
|
||||||
|
If you don't know, do not make up an answer.
|
||||||
|
Do not add or change items on the menu.
|
||||||
|
|
||||||
|
Context:
|
||||||
|
${extractedContent.join('\n\n')}
|
||||||
|
|
||||||
|
Question: ${input}`,
|
||||||
|
docs,
|
||||||
|
});
|
||||||
|
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
```
|
||||||
|
Now using our retrieval flow, we can ask question about the ingsted PDF
|
||||||
|
<img width="1306" alt="Screenshot 2025-05-11 at 7 18 45 PM" src="https://github.com/user-attachments/assets/86c66b13-7c12-4d5f-9d81-ae36bfb1c346" />
|
||||||
|
|
||||||
Reference in New Issue
Block a user