mirror of
https://github.com/lancedb/lancedb.git
synced 2026-04-11 10:20:39 +00:00
Compare commits
4 Commits
codex/upda
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
11bc674548 | ||
|
|
5593460823 | ||
|
|
2807ad6854 | ||
|
|
4761fa9bcb |
1
.github/workflows/nodejs.yml
vendored
1
.github/workflows/nodejs.yml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- nodejs/**
|
||||
- rust/**
|
||||
- docs/src/js/**
|
||||
|
||||
1
.github/workflows/python.yml
vendored
1
.github/workflows/python.yml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- python/**
|
||||
- rust/**
|
||||
- .github/workflows/python.yml
|
||||
|
||||
1
.github/workflows/rust.yml
vendored
1
.github/workflows/rust.yml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- rust/**
|
||||
- .github/workflows/rust.yml
|
||||
|
||||
|
||||
67
Cargo.lock
generated
67
Cargo.lock
generated
@@ -3072,8 +3072,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||
|
||||
[[package]]
|
||||
name = "fsst"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"rand 0.9.2",
|
||||
@@ -4134,13 +4134,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-ipc",
|
||||
"arrow-ord",
|
||||
"arrow-row",
|
||||
@@ -4201,13 +4202,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-arrow"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-data",
|
||||
"arrow-ipc",
|
||||
"arrow-ord",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
@@ -4222,8 +4224,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-bitpacking"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"paste",
|
||||
@@ -4232,8 +4234,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-core"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4270,12 +4272,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datafusion"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-ord",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
@@ -4301,8 +4304,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-datagen"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4320,8 +4323,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-encoding"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -4358,8 +4361,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-file"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
@@ -4391,8 +4394,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-index"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -4456,8 +4459,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-io"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
@@ -4501,8 +4504,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-linalg"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
@@ -4518,8 +4521,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
@@ -4532,8 +4535,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-namespace-impls"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-ipc",
|
||||
@@ -4578,8 +4581,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-table"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4618,8 +4621,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lance-testing"
|
||||
version = "5.0.0-beta.5"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
|
||||
version = "5.1.0-beta.2"
|
||||
source = "git+https://github.com/lance-format/lance.git?tag=v5.1.0-beta.2#df61d95cac9ab579e4bc4ff41d1bd749b24af7f0"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-schema",
|
||||
|
||||
28
Cargo.toml
28
Cargo.toml
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=5.1.0-beta.2", default-features = false, "tag" = "v5.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=5.1.0-beta.2", "tag" = "v5.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=5.1.0-beta.2", "tag" = "v5.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=5.1.0-beta.2", "tag" = "v5.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=5.1.0-beta.2", default-features = false, "tag" = "v5.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=5.1.0-beta.2", "tag" = "v5.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=5.1.0-beta.2", "tag" = "v5.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=5.1.0-beta.2", "tag" = "v5.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=5.1.0-beta.2", default-features = false, "tag" = "v5.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=5.1.0-beta.2", "tag" = "v5.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=5.1.0-beta.2", "tag" = "v5.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=5.1.0-beta.2", "tag" = "v5.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=5.1.0-beta.2", "tag" = "v5.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=5.1.0-beta.2", "tag" = "v5.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "57.2", optional = false }
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>5.0.0-beta.5</lance-core.version>
|
||||
<lance-core.version>5.1.0-beta.2</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.31.0-beta.1"
|
||||
current_version = "0.31.0-beta.2"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.31.0-beta.1"
|
||||
version = "0.31.0-beta.2"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -83,7 +83,7 @@ embeddings = [
|
||||
"colpali-engine>=0.3.10",
|
||||
"huggingface_hub>=0.19.0",
|
||||
"InstructorEmbedding>=1.0.1",
|
||||
"google.generativeai>=0.3.0",
|
||||
"google-genai>=1.0.0",
|
||||
"boto3>=1.28.57",
|
||||
"awscli>=1.44.38",
|
||||
"botocore>=1.31.57",
|
||||
|
||||
@@ -19,10 +19,10 @@ from .utils import TEXT, api_key_not_found_help
|
||||
@register("gemini-text")
|
||||
class GeminiText(TextEmbeddingFunction):
|
||||
"""
|
||||
An embedding function that uses the Google's Gemini API. Requires GOOGLE_API_KEY to
|
||||
An embedding function that uses Google's Gemini API. Requires GOOGLE_API_KEY to
|
||||
be set.
|
||||
|
||||
https://ai.google.dev/docs/embeddings_guide
|
||||
https://ai.google.dev/gemini-api/docs/embeddings
|
||||
|
||||
Supports various tasks types:
|
||||
| Task Type | Description |
|
||||
@@ -46,9 +46,12 @@ class GeminiText(TextEmbeddingFunction):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str, default "models/embedding-001"
|
||||
The name of the model to use. See the Gemini documentation for a list of
|
||||
available models.
|
||||
name: str, default "gemini-embedding-001"
|
||||
The name of the model to use. Supported models include:
|
||||
- "gemini-embedding-001" (768 dimensions)
|
||||
|
||||
Note: The legacy "models/embedding-001" format is also supported but
|
||||
"gemini-embedding-001" is recommended.
|
||||
|
||||
query_task_type: str, default "retrieval_query"
|
||||
Sets the task type for the queries.
|
||||
@@ -77,7 +80,7 @@ class GeminiText(TextEmbeddingFunction):
|
||||
|
||||
"""
|
||||
|
||||
name: str = "models/embedding-001"
|
||||
name: str = "gemini-embedding-001"
|
||||
query_task_type: str = "retrieval_query"
|
||||
source_task_type: str = "retrieval_document"
|
||||
|
||||
@@ -114,23 +117,48 @@ class GeminiText(TextEmbeddingFunction):
|
||||
texts: list[str] or np.ndarray (of str)
|
||||
The texts to embed
|
||||
"""
|
||||
if (
|
||||
kwargs.get("task_type") == "retrieval_document"
|
||||
): # Provide a title to use existing API design
|
||||
title = "Embedding of a document"
|
||||
kwargs["title"] = title
|
||||
from google.genai import types
|
||||
|
||||
return [
|
||||
self.client.embed_content(model=self.name, content=text, **kwargs)[
|
||||
"embedding"
|
||||
]
|
||||
for text in texts
|
||||
]
|
||||
task_type = kwargs.get("task_type")
|
||||
|
||||
# Build content objects for embed_content
|
||||
contents = []
|
||||
for text in texts:
|
||||
if task_type == "retrieval_document":
|
||||
# Provide a title for retrieval_document task
|
||||
contents.append(
|
||||
{"parts": [{"text": "Embedding of a document"}, {"text": text}]}
|
||||
)
|
||||
else:
|
||||
contents.append({"parts": [{"text": text}]})
|
||||
|
||||
# Build config
|
||||
config_kwargs = {}
|
||||
if task_type:
|
||||
config_kwargs["task_type"] = task_type.upper() # API expects uppercase
|
||||
|
||||
# Call embed_content for each content
|
||||
embeddings = []
|
||||
for content in contents:
|
||||
config = (
|
||||
types.EmbedContentConfig(**config_kwargs) if config_kwargs else None
|
||||
)
|
||||
response = self.client.models.embed_content(
|
||||
model=self.name,
|
||||
contents=content,
|
||||
config=config,
|
||||
)
|
||||
embeddings.append(response.embeddings[0].values)
|
||||
|
||||
return embeddings
|
||||
|
||||
@cached_property
|
||||
def client(self):
|
||||
genai = attempt_import_or_raise("google.generativeai", "google.generativeai")
|
||||
attempt_import_or_raise("google.genai", "google-genai")
|
||||
|
||||
if not os.environ.get("GOOGLE_API_KEY"):
|
||||
api_key_not_found_help("google")
|
||||
return genai
|
||||
|
||||
from google import genai as genai_module
|
||||
|
||||
return genai_module.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
[toolchain]
|
||||
channel = "1.91.0"
|
||||
channel = "1.94.0"
|
||||
|
||||
@@ -177,6 +177,7 @@ impl BedrockEmbeddingFunction {
|
||||
))
|
||||
.send()
|
||||
.await
|
||||
.map_err(Box::new)
|
||||
})
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
Reference in New Issue
Block a user