mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-08 12:52:58 +00:00
Compare commits
4 Commits
codex/upda
...
ayush/gemi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5904aec34b | ||
|
|
1fa888615f | ||
|
|
40967f3baa | ||
|
|
0bfc7de32c |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.22.3-beta.1"
|
||||
current_version = "0.22.3-beta.2"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
6
Cargo.lock
generated
6
Cargo.lock
generated
@@ -4704,7 +4704,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb"
|
||||
version = "0.22.3-beta.1"
|
||||
version = "0.22.3-beta.2"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
@@ -4801,7 +4801,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-nodejs"
|
||||
version = "0.22.3-beta.1"
|
||||
version = "0.22.3-beta.2"
|
||||
dependencies = [
|
||||
"arrow-array",
|
||||
"arrow-ipc",
|
||||
@@ -4821,7 +4821,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lancedb-python"
|
||||
version = "0.25.3-beta.1"
|
||||
version = "0.25.3-beta.2"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
|
||||
@@ -16,7 +16,6 @@ rust-version = "1.78.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.38.3-beta.9", default-features = false, "tag" = "v0.38.3-beta.9", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-arrow = { "version" = "=0.38.3-beta.9", "tag" = "v0.38.3-beta.9", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-core = { "version" = "=0.38.3-beta.9", "tag" = "v0.38.3-beta.9", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-datagen = { "version" = "=0.38.3-beta.9", "tag" = "v0.38.3-beta.9", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-file = { "version" = "=0.38.3-beta.9", "tag" = "v0.38.3-beta.9", "git" = "https://github.com/lancedb/lance.git" }
|
||||
@@ -29,6 +28,7 @@ lance-table = { "version" = "=0.38.3-beta.9", "tag" = "v0.38.3-beta.9", "git" =
|
||||
lance-testing = { "version" = "=0.38.3-beta.9", "tag" = "v0.38.3-beta.9", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-datafusion = { "version" = "=0.38.3-beta.9", "tag" = "v0.38.3-beta.9", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-encoding = { "version" = "=0.38.3-beta.9", "tag" = "v0.38.3-beta.9", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-arrow = { "version" = "=0.38.3-beta.9", "tag" = "v0.38.3-beta.9", "git" = "https://github.com/lancedb/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "56.2", optional = false }
|
||||
@@ -65,4 +65,4 @@ semver = "1.0.25"
|
||||
crunchy = "0.2.4"
|
||||
chrono = "0.4"
|
||||
# Workaround for: https://github.com/Lokathor/bytemuck/issues/306
|
||||
bytemuck_derive = ">=1.8.1, <1.9.0"
|
||||
bytemuck_derive = ">=1.8.1, <1.9.0"
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.22.3-beta.1</version>
|
||||
<version>0.22.3-beta.2</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.22.3-beta.1</version>
|
||||
<version>0.22.3-beta.2</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.22.3-beta.1</version>
|
||||
<version>0.22.3-beta.2</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.22.3-beta.1"
|
||||
version = "0.22.3-beta.2"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.22.3-beta.1",
|
||||
"version": "0.22.3-beta.2",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.22.3-beta.1",
|
||||
"version": "0.22.3-beta.2",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.22.3-beta.1",
|
||||
"version": "0.22.3-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.22.3-beta.1",
|
||||
"version": "0.22.3-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.22.3-beta.1",
|
||||
"version": "0.22.3-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.22.3-beta.1",
|
||||
"version": "0.22.3-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.22.3-beta.1",
|
||||
"version": "0.22.3-beta.2",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.22.3-beta.1",
|
||||
"version": "0.22.3-beta.2",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.22.3-beta.1",
|
||||
"version": "0.22.3-beta.2",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.22.3-beta.1",
|
||||
"version": "0.22.3-beta.2",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.22.3-beta.1",
|
||||
"version": "0.22.3-beta.2",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.25.3-beta.1"
|
||||
current_version = "0.25.3-beta.2"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.25.3-beta.1"
|
||||
version = "0.25.3-beta.2"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
import os
|
||||
from functools import cached_property
|
||||
from typing import List, Union
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
@@ -46,10 +46,11 @@ class GeminiText(TextEmbeddingFunction):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str, default "models/embedding-001"
|
||||
name: str, default "models/text-embedding-004"
|
||||
The name of the model to use. See the Gemini documentation for a list of
|
||||
available models.
|
||||
|
||||
dims: int, optional
|
||||
The dimension of the embedding, otherwise it will be inferred.
|
||||
query_task_type: str, default "retrieval_query"
|
||||
Sets the task type for the queries.
|
||||
source_task_type: str, default "retrieval_document"
|
||||
@@ -77,9 +78,10 @@ class GeminiText(TextEmbeddingFunction):
|
||||
|
||||
"""
|
||||
|
||||
name: str = "models/embedding-001"
|
||||
name: str = "models/text-embedding-004"
|
||||
query_task_type: str = "retrieval_query"
|
||||
source_task_type: str = "retrieval_document"
|
||||
dims: Optional[int] = None
|
||||
|
||||
if PYDANTIC_VERSION.major < 2: # Pydantic 1.x compat
|
||||
|
||||
@@ -89,9 +91,18 @@ class GeminiText(TextEmbeddingFunction):
|
||||
model_config = dict()
|
||||
model_config["ignored_types"] = (cached_property,)
|
||||
|
||||
def ndims(self):
|
||||
# TODO: fix hardcoding
|
||||
return 768
|
||||
@cached_property
|
||||
def _model(self):
|
||||
return self.client.get_model(self.name)
|
||||
|
||||
def ndims(self) -> int:
|
||||
if self.dims:
|
||||
return self.dims
|
||||
if hasattr(self._model, "output_dimensionality"):
|
||||
return self._model.output_dimensionality
|
||||
# Fallback for older versions of the library
|
||||
# or models that don't have the attribute
|
||||
return len(self.generate_embeddings(["lancedb"])[0])
|
||||
|
||||
def compute_query_embeddings(self, query: str, *args, **kwargs) -> List[np.array]:
|
||||
return self.compute_source_embeddings(query, task_type=self.query_task_type)
|
||||
@@ -119,6 +130,8 @@ class GeminiText(TextEmbeddingFunction):
|
||||
): # Provide a title to use existing API design
|
||||
title = "Embedding of a document"
|
||||
kwargs["title"] = title
|
||||
if self.dims:
|
||||
kwargs["output_dimensionality"] = self.dims
|
||||
|
||||
return [
|
||||
self.client.embed_content(model=self.name, content=text, **kwargs)[
|
||||
@@ -131,6 +144,8 @@ class GeminiText(TextEmbeddingFunction):
|
||||
def client(self):
|
||||
genai = attempt_import_or_raise("google.generativeai", "google.generativeai")
|
||||
|
||||
if not os.environ.get("GOOGLE_API_KEY"):
|
||||
api_key = os.environ.get("GOOGLE_API_KEY")
|
||||
if not api_key:
|
||||
api_key_not_found_help("google")
|
||||
genai.configure(api_key=api_key)
|
||||
return genai
|
||||
|
||||
@@ -308,7 +308,7 @@ def test_instructor_embedding(tmp_path):
|
||||
os.environ.get("GOOGLE_API_KEY") is None, reason="GOOGLE_API_KEY not set"
|
||||
)
|
||||
def test_gemini_embedding(tmp_path):
|
||||
model = get_registry().get("gemini-text").create(max_retries=0)
|
||||
model = get_registry().get("gemini-text").create(max_retries=0, dims=512)
|
||||
|
||||
class TextModel(LanceModel):
|
||||
text: str = model.SourceField()
|
||||
@@ -319,7 +319,7 @@ def test_gemini_embedding(tmp_path):
|
||||
tbl = db.create_table("test", schema=TextModel, mode="overwrite")
|
||||
|
||||
tbl.add(df)
|
||||
assert len(tbl.to_pandas()["vector"][0]) == model.ndims()
|
||||
assert len(tbl.to_pandas()["vector"][0]) == model.ndims() == 512
|
||||
assert tbl.search("hello").limit(1).to_pandas()["text"][0] == "hello world"
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.22.3-beta.1"
|
||||
version = "0.22.3-beta.2"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
@@ -32,7 +32,6 @@ snafu = { workspace = true }
|
||||
half = { workspace = true }
|
||||
lazy_static.workspace = true
|
||||
lance = { workspace = true }
|
||||
lance-arrow = { workspace = true }
|
||||
lance-core = { workspace = true }
|
||||
lance-datafusion.workspace = true
|
||||
lance-datagen = { workspace = true }
|
||||
@@ -43,6 +42,7 @@ lance-table = { workspace = true }
|
||||
lance-linalg = { workspace = true }
|
||||
lance-testing = { workspace = true }
|
||||
lance-encoding = { workspace = true }
|
||||
lance-arrow = { workspace = true }
|
||||
lance-namespace = { workspace = true }
|
||||
lance-namespace-impls = { workspace = true, features = ["dir", "rest"] }
|
||||
moka = { workspace = true }
|
||||
|
||||
@@ -1427,6 +1427,10 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
"NOT_SUPPORTED"
|
||||
}
|
||||
|
||||
async fn storage_options(&self) -> Option<HashMap<String, String>> {
|
||||
None
|
||||
}
|
||||
|
||||
async fn stats(&self) -> Result<TableStatistics> {
|
||||
let request = self
|
||||
.client
|
||||
|
||||
@@ -601,6 +601,8 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
|
||||
async fn table_definition(&self) -> Result<TableDefinition>;
|
||||
/// Get the table URI
|
||||
fn dataset_uri(&self) -> &str;
|
||||
/// Get the storage options used when opening this table, if any.
|
||||
async fn storage_options(&self) -> Option<HashMap<String, String>>;
|
||||
/// Poll until the columns are fully indexed. Will return Error::Timeout if the columns
|
||||
/// are not fully indexed within the timeout.
|
||||
async fn wait_for_index(
|
||||
@@ -1293,6 +1295,13 @@ impl Table {
|
||||
self.inner.dataset_uri()
|
||||
}
|
||||
|
||||
/// Get the storage options used when opening this table, if any.
|
||||
///
|
||||
/// Warning: This is an internal API and the return value is subject to change.
|
||||
pub async fn storage_options(&self) -> Option<HashMap<String, String>> {
|
||||
self.inner.storage_options().await
|
||||
}
|
||||
|
||||
/// Get statistics about an index.
|
||||
/// Returns None if the index does not exist.
|
||||
pub async fn index_stats(
|
||||
@@ -2617,6 +2626,14 @@ impl BaseTable for NativeTable {
|
||||
self.uri.as_str()
|
||||
}
|
||||
|
||||
async fn storage_options(&self) -> Option<HashMap<String, String>> {
|
||||
self.dataset
|
||||
.get()
|
||||
.await
|
||||
.ok()
|
||||
.and_then(|dataset| dataset.storage_options().cloned())
|
||||
}
|
||||
|
||||
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
|
||||
let stats = match self
|
||||
.dataset
|
||||
@@ -2626,7 +2643,7 @@ impl BaseTable for NativeTable {
|
||||
.await
|
||||
{
|
||||
Ok(stats) => stats,
|
||||
Err(lance::Error::IndexNotFound { .. }) => return Ok(None),
|
||||
Err(lance_core::Error::IndexNotFound { .. }) => return Ok(None),
|
||||
Err(e) => return Err(Error::from(e)),
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user