Compare commits

...

4 Commits

Author SHA1 Message Date
lancedb automation
3bed9b6db8 chore: update lance dependency to v5.1.0-beta.1 2026-04-10 17:13:51 +00:00
Will Jones
2807ad6854 chore: bump Rust toolchain from 1.91.0 to 1.94.0 (#3257)
Bumps the Rust toolchain to 1.94.0 (latest installed) to unblock CI
failures caused by the AWS SDK's MSRV requirement. No lint fixes were
needed.

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-10 07:57:47 -07:00
Dhruv Garg
4761fa9bcb fix(python): migrate gemini-text provider to google-genai sdk (#3250)
## Summary
- migrate gemini-text embedding provider from deprecated
google.generativeai to google.genai
- update Python embedding extra dependency to google-genai
- update default model name to gemini-embedding-001
- adapt embed calls to Client().models.embed_content(...)
- apply lint fixes from CI

## Related
- Closes #3191
2026-04-09 15:28:34 -07:00
lennylxx
4c2939d66e fix(python): guard against None before .decode() on split_names metadata key (#3229)
`.get(b"split_names", None).decode()` was called unconditionally in both
Permutations.__init__ and Permutation.from_tables(), crashing with
AttributeError when schema metadata existed but lacked the split_names
key. Guard the decode behind a None check and add regression tests.
2026-04-08 16:04:13 -07:00
12 changed files with 149 additions and 74 deletions

View File

@@ -8,6 +8,7 @@ on:
paths:
- Cargo.toml
- Cargo.lock
- rust-toolchain.toml
- nodejs/**
- rust/**
- docs/src/js/**

View File

@@ -8,6 +8,7 @@ on:
paths:
- Cargo.toml
- Cargo.lock
- rust-toolchain.toml
- python/**
- rust/**
- .github/workflows/python.yml

View File

@@ -8,6 +8,7 @@ on:
paths:
- Cargo.toml
- Cargo.lock
- rust-toolchain.toml
- rust/**
- .github/workflows/rust.yml

65
Cargo.lock generated
View File

@@ -3072,8 +3072,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -4134,8 +4134,8 @@ dependencies = [
[[package]]
name = "lance"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrow",
"arrow-arith",
@@ -4201,13 +4201,14 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-data",
"arrow-ipc",
"arrow-ord",
"arrow-schema",
"arrow-select",
@@ -4222,8 +4223,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrayref",
"paste",
@@ -4232,8 +4233,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4270,8 +4271,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrow",
"arrow-array",
@@ -4301,8 +4302,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrow",
"arrow-array",
@@ -4320,8 +4321,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4358,8 +4359,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4391,8 +4392,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrow",
"arrow-arith",
@@ -4456,8 +4457,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrow",
"arrow-arith",
@@ -4501,8 +4502,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4518,8 +4519,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrow",
"async-trait",
@@ -4532,8 +4533,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrow",
"arrow-ipc",
@@ -4578,8 +4579,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrow",
"arrow-array",
@@ -4618,8 +4619,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.1.0-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.1#103e947aef451e4b88da03fe47512558d333c29c"
dependencies = [
"arrow-array",
"arrow-schema",

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.91.0"
[workspace.dependencies]
lance = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=5.1.0-beta.1", default-features = false, "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=5.1.0-beta.1", default-features = false, "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=5.1.0-beta.1", default-features = false, "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=5.1.0-beta.1", "tag" = "v5.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "57.2", optional = false }

View File

@@ -28,7 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<arrow.version>15.0.0</arrow.version>
<lance-core.version>5.0.0-beta.5</lance-core.version>
<lance-core.version>5.1.0-beta.1</lance-core.version>
<spotless.skip>false</spotless.skip>
<spotless.version>2.30.0</spotless.version>
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>

View File

@@ -83,7 +83,7 @@ embeddings = [
"colpali-engine>=0.3.10",
"huggingface_hub>=0.19.0",
"InstructorEmbedding>=1.0.1",
"google.generativeai>=0.3.0",
"google-genai>=1.0.0",
"boto3>=1.28.57",
"awscli>=1.44.38",
"botocore>=1.31.57",

View File

@@ -19,10 +19,10 @@ from .utils import TEXT, api_key_not_found_help
@register("gemini-text")
class GeminiText(TextEmbeddingFunction):
"""
An embedding function that uses the Google's Gemini API. Requires GOOGLE_API_KEY to
An embedding function that uses Google's Gemini API. Requires GOOGLE_API_KEY to
be set.
https://ai.google.dev/docs/embeddings_guide
https://ai.google.dev/gemini-api/docs/embeddings
Supports various tasks types:
| Task Type | Description |
@@ -46,9 +46,12 @@ class GeminiText(TextEmbeddingFunction):
Parameters
----------
name: str, default "models/embedding-001"
The name of the model to use. See the Gemini documentation for a list of
available models.
name: str, default "gemini-embedding-001"
The name of the model to use. Supported models include:
- "gemini-embedding-001" (768 dimensions)
Note: The legacy "models/embedding-001" format is also supported but
"gemini-embedding-001" is recommended.
query_task_type: str, default "retrieval_query"
Sets the task type for the queries.
@@ -77,7 +80,7 @@ class GeminiText(TextEmbeddingFunction):
"""
name: str = "models/embedding-001"
name: str = "gemini-embedding-001"
query_task_type: str = "retrieval_query"
source_task_type: str = "retrieval_document"
@@ -114,23 +117,48 @@ class GeminiText(TextEmbeddingFunction):
texts: list[str] or np.ndarray (of str)
The texts to embed
"""
if (
kwargs.get("task_type") == "retrieval_document"
): # Provide a title to use existing API design
title = "Embedding of a document"
kwargs["title"] = title
from google.genai import types
return [
self.client.embed_content(model=self.name, content=text, **kwargs)[
"embedding"
]
for text in texts
]
task_type = kwargs.get("task_type")
# Build content objects for embed_content
contents = []
for text in texts:
if task_type == "retrieval_document":
# Provide a title for retrieval_document task
contents.append(
{"parts": [{"text": "Embedding of a document"}, {"text": text}]}
)
else:
contents.append({"parts": [{"text": text}]})
# Build config
config_kwargs = {}
if task_type:
config_kwargs["task_type"] = task_type.upper() # API expects uppercase
# Call embed_content for each content
embeddings = []
for content in contents:
config = (
types.EmbedContentConfig(**config_kwargs) if config_kwargs else None
)
response = self.client.models.embed_content(
model=self.name,
contents=content,
config=config,
)
embeddings.append(response.embeddings[0].values)
return embeddings
@cached_property
def client(self):
genai = attempt_import_or_raise("google.generativeai", "google.generativeai")
attempt_import_or_raise("google.genai", "google-genai")
if not os.environ.get("GOOGLE_API_KEY"):
api_key_not_found_help("google")
return genai
from google import genai as genai_module
return genai_module.Client(api_key=os.environ.get("GOOGLE_API_KEY"))

View File

@@ -284,9 +284,8 @@ class Permutations:
self.permutation_table = permutation_table
if permutation_table.schema.metadata is not None:
split_names = permutation_table.schema.metadata.get(
b"split_names", None
).decode("utf-8")
raw = permutation_table.schema.metadata.get(b"split_names")
split_names = raw.decode("utf-8") if raw is not None else None
if split_names is not None:
self.split_names = json.loads(split_names)
self.split_dict = {
@@ -460,9 +459,8 @@ class Permutation:
f"Cannot create a permutation on split `{split}`"
" because no split names are defined in the permutation table"
)
split_names = permutation_table.schema.metadata.get(
b"split_names", None
).decode("utf-8")
raw = permutation_table.schema.metadata.get(b"split_names")
split_names = raw.decode("utf-8") if raw is not None else None
if split_names is None:
raise ValueError(
f"Cannot create a permutation on split `{split}`"

View File

@@ -522,6 +522,50 @@ def test_no_split_names(some_table: Table):
assert permutations[1].num_rows == 500
def test_permutations_metadata_without_split_names_key(mem_db: DBConnection):
"""Regression: schema metadata present but missing split_names key must not crash.
Previously, `.get(b"split_names", None).decode()` was called unconditionally,
so any permutation table whose metadata dict had other keys but no split_names
raised AttributeError: 'NoneType' has no attribute 'decode'.
"""
base = mem_db.create_table("base_nosplit", pa.table({"x": range(10)}))
# Build a permutation-like table that carries some metadata but NOT split_names.
raw = pa.table(
{
"row_id": pa.array(range(10), type=pa.uint64()),
"split_id": pa.array([0] * 10, type=pa.uint32()),
}
).replace_schema_metadata({b"other_key": b"other_value"})
perm_tbl = mem_db.create_table("perm_nosplit", raw)
permutations = Permutations(base, perm_tbl)
assert permutations.split_names == []
assert permutations.split_dict == {}
def test_from_tables_string_split_missing_names_key(mem_db: DBConnection):
"""Regression: from_tables() with a string split must raise ValueError, not
AttributeError.
Previously, `.get(b"split_names", None).decode()` crashed with AttributeError
when the metadata dict existed but had no split_names key.
"""
base = mem_db.create_table("base_strsplit", pa.table({"x": range(10)}))
raw = pa.table(
{
"row_id": pa.array(range(10), type=pa.uint64()),
"split_id": pa.array([0] * 10, type=pa.uint32()),
}
).replace_schema_metadata({b"other_key": b"other_value"})
perm_tbl = mem_db.create_table("perm_strsplit", raw)
with pytest.raises(ValueError, match="no split names are defined"):
Permutation.from_tables(base, perm_tbl, split="train")
@pytest.fixture
def some_perm_table(some_table: Table) -> Table:
return (

View File

@@ -1,2 +1,2 @@
[toolchain]
channel = "1.91.0"
channel = "1.94.0"

View File

@@ -177,6 +177,7 @@ impl BedrockEmbeddingFunction {
))
.send()
.await
.map_err(Box::new)
})
})
.unwrap();