fix(python): migrate gemini-text provider to google-genai sdk (#3250)

## Summary - migrate gemini-text embedding provider from deprecated google.generativeai to google.genai - update Python embedding extra dependency to google-genai - update default model name to gemini-embedding-001 - adapt embed calls to Client().models.embed_content(...) - apply lint fixes from CI ## Related - Closes #3191
2026-07-03 19:10:41 +00:00 · 2026-04-10 03:58:34 +05:30
parent 4c2939d66e
commit 4761fa9bcb
2 changed files with 48 additions and 20 deletions
--- a/python/python/lancedb/embeddings/gemini_text.py
+++ b/python/python/lancedb/embeddings/gemini_text.py
@@ -19,10 +19,10 @@ from .utils import TEXT, api_key_not_found_help
@register("gemini-text")
 class GeminiText(TextEmbeddingFunction):
    """
-    An embedding function that uses the Google's Gemini API. Requires GOOGLE_API_KEY to
+    An embedding function that uses Google's Gemini API. Requires GOOGLE_API_KEY to
    be set.

-    https://ai.google.dev/docs/embeddings_guide
+    https://ai.google.dev/gemini-api/docs/embeddings

    Supports various tasks types:
    | Task Type               | Description                                            |
@@ -46,9 +46,12 @@ class GeminiText(TextEmbeddingFunction):

    Parameters
    ----------
-    name: str, default "models/embedding-001"
-        The name of the model to use. See the Gemini documentation for a list of
-        available models.
+    name: str, default "gemini-embedding-001"
+        The name of the model to use. Supported models include:
+        - "gemini-embedding-001" (768 dimensions)
+
+        Note: The legacy "models/embedding-001" format is also supported but
+        "gemini-embedding-001" is recommended.

    query_task_type: str, default "retrieval_query"
        Sets the task type for the queries.
@@ -77,7 +80,7 @@ class GeminiText(TextEmbeddingFunction):

    """

-    name: str = "models/embedding-001"
+    name: str = "gemini-embedding-001"
    query_task_type: str = "retrieval_query"
    source_task_type: str = "retrieval_document"

@@ -114,23 +117,48 @@ class GeminiText(TextEmbeddingFunction):
        texts: list[str] or np.ndarray (of str)
            The texts to embed
        """
-        if (
-            kwargs.get("task_type") == "retrieval_document"
-        ):  # Provide a title to use existing API design
-            title = "Embedding of a document"
-            kwargs["title"] = title
+        from google.genai import types

-        return [
-            self.client.embed_content(model=self.name, content=text, **kwargs)[
-                "embedding"
-            ]
-            for text in texts
-        ]
+        task_type = kwargs.get("task_type")
+
+        # Build content objects for embed_content
+        contents = []
+        for text in texts:
+            if task_type == "retrieval_document":
+                # Provide a title for retrieval_document task
+                contents.append(
+                    {"parts": [{"text": "Embedding of a document"}, {"text": text}]}
+                )
+            else:
+                contents.append({"parts": [{"text": text}]})
+
+        # Build config
+        config_kwargs = {}
+        if task_type:
+            config_kwargs["task_type"] = task_type.upper()  # API expects uppercase
+
+        # Call embed_content for each content
+        embeddings = []
+        for content in contents:
+            config = (
+                types.EmbedContentConfig(**config_kwargs) if config_kwargs else None
+            )
+            response = self.client.models.embed_content(
+                model=self.name,
+                contents=content,
+                config=config,
+            )
+            embeddings.append(response.embeddings[0].values)
+
+        return embeddings

    @cached_property
    def client(self):
-        genai = attempt_import_or_raise("google.generativeai", "google.generativeai")
+        attempt_import_or_raise("google.genai", "google-genai")

        if not os.environ.get("GOOGLE_API_KEY"):
            api_key_not_found_help("google")
-        return genai
+
+        from google import genai as genai_module
+
+        return genai_module.Client(api_key=os.environ.get("GOOGLE_API_KEY"))