diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 560452ec..f0596d80 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -205,6 +205,7 @@ nav: - PromptTools: integrations/prompttools.md - dlt: integrations/dlt.md - phidata: integrations/phidata.md + - Genkit: integrations/genkit.md - 🎯 Examples: - Overview: examples/index.md - 🐍 Python: @@ -331,6 +332,7 @@ nav: - PromptTools: integrations/prompttools.md - dlt: integrations/dlt.md - phidata: integrations/phidata.md + - Genkit: integrations/genkit.md - Examples: - examples/index.md - 🐍 Python: diff --git a/docs/src/integrations/genkit.md b/docs/src/integrations/genkit.md new file mode 100644 index 00000000..9be55aa7 --- /dev/null +++ b/docs/src/integrations/genkit.md @@ -0,0 +1,181 @@ +### genkitx-lancedb +This is a lancedb plugin for genkit framework. It allows you to use LanceDB for ingesting and rereiving data using genkit framework. + +### Installation +```bash +pnpm install genkitx-lancedb +``` + +### Usage + +Adding LanceDB plugin to your genkit instance. + +```ts +import { lancedbIndexerRef, lancedb, lancedbRetrieverRef, WriteMode } from 'genkitx-lancedb'; +import { textEmbedding004, vertexAI } from '@genkit-ai/vertexai'; +import { gemini } from '@genkit-ai/vertexai'; +import { z, genkit } from 'genkit'; +import { Document } from 'genkit/retriever'; +import { chunk } from 'llm-chunk'; +import { readFile } from 'fs/promises'; +import path from 'path'; +import pdf from 'pdf-parse/lib/pdf-parse'; + +const ai = genkit({ + plugins: [ + // vertexAI provides the textEmbedding004 embedder + vertexAI(), + + // the local vector store requires an embedder to translate from text to vector + lancedb([ + { + dbUri: '.db', // optional lancedb uri, default to .db + tableName: 'table', // optional table name, default to table + embedder: textEmbedding004, + }, + ]), + ], +}); +``` + +You can run this app with the following command: +```bash +genkit start -- tsx --watch src/index.ts +``` + +This'll add LanceDB as a retriever and indexer to the genkit instance. You can see it in the GUI view +Screenshot 2025-05-11 at 7 21 05 PM + +**Testing retrieval on a sample table** +Let's see the raw retrieval results + +Screenshot 2025-05-11 at 7 21 05 PM +On running this query, you'll 5 results fetched from the lancedb table, where each result looks something like this: +Screenshot 2025-05-11 at 7 21 18 PM + + + +## Creating a custom RAG flow + +Now that we've seen how you can use LanceDB for in a genkit pipeline, let's refine the flow and create a RAG. A RAG flow will consist of an index and a retreiver with its outputs postprocessed an fed into an LLM for final response + +### Creating custom indexer flows +You can also create custom indexer flows, utilizing more options and features provided by LanceDB. + +```ts +export const menuPdfIndexer = lancedbIndexerRef({ + // Using all defaults, for dbUri, tableName, and embedder, etc +}); + +const chunkingConfig = { + minLength: 1000, + maxLength: 2000, + splitter: 'sentence', + overlap: 100, + delimiters: '', +} as any; + + +async function extractTextFromPdf(filePath: string) { + const pdfFile = path.resolve(filePath); + const dataBuffer = await readFile(pdfFile); + const data = await pdf(dataBuffer); + return data.text; +} + +export const indexMenu = ai.defineFlow( + { + name: 'indexMenu', + inputSchema: z.string().describe('PDF file path'), + outputSchema: z.void(), + }, + async (filePath: string) => { + filePath = path.resolve(filePath); + + // Read the pdf. + const pdfTxt = await ai.run('extract-text', () => + extractTextFromPdf(filePath) + ); + + // Divide the pdf text into segments. + const chunks = await ai.run('chunk-it', async () => + chunk(pdfTxt, chunkingConfig) + ); + + // Convert chunks of text into documents to store in the index. + const documents = chunks.map((text) => { + return Document.fromText(text, { filePath }); + }); + + // Add documents to the index. + await ai.index({ + indexer: menuPdfIndexer, + documents, + options: { + writeMode: WriteMode.Overwrite, + } as any + }); + } +); +``` + +Screenshot 2025-05-11 at 8 35 56 PM + +In your console, you can see the logs + +Screenshot 2025-05-11 at 7 19 14 PM + +### Creating custom retriever flows +You can also create custom retriever flows, utilizing more options and features provided by LanceDB. +```ts +export const menuRetriever = lancedbRetrieverRef({ + tableName: "table", // Use the same table name as the indexer. + displayName: "Menu", // Use a custom display name. + +export const menuQAFlow = ai.defineFlow( + { name: "Menu", inputSchema: z.string(), outputSchema: z.string() }, + async (input: string) => { + // retrieve relevant documents + const docs = await ai.retrieve({ + retriever: menuRetriever, + query: input, + options: { + k: 3, + }, + }); + + const extractedContent = docs.map(doc => { + if (doc.content && Array.isArray(doc.content) && doc.content.length > 0) { + if (doc.content[0].media && doc.content[0].media.url) { + return doc.content[0].media.url; + } + } + return "No content found"; + }); + + console.log("Extracted content:", extractedContent); + + const { text } = await ai.generate({ + model: gemini('gemini-2.0-flash'), + prompt: ` +You are acting as a helpful AI assistant that can answer +questions about the food available on the menu at Genkit Grub Pub. + +Use only the context provided to answer the question. +If you don't know, do not make up an answer. +Do not add or change items on the menu. + +Context: +${extractedContent.join('\n\n')} + +Question: ${input}`, + docs, + }); + + return text; + } +); +``` +Now using our retrieval flow, we can ask question about the ingsted PDF +Screenshot 2025-05-11 at 7 18 45 PM +