From 4372c231cd1a9a6c54f36a949e49411ef26c47a6 Mon Sep 17 00:00:00 2001
From: BubbleCal
Date: Sat, 9 Nov 2024 00:48:07 +0800
Subject: [PATCH 1/6] feat: support optimize indices in sync API (#1769)
Signed-off-by: BubbleCal
---
python/python/lancedb/remote/table.py | 14 +++
python/python/lancedb/table.py | 131 +++++++++++++++++++++++++-
python/python/tests/test_table.py | 48 ++++++++++
3 files changed, 192 insertions(+), 1 deletion(-)
diff --git a/python/python/lancedb/remote/table.py b/python/python/lancedb/remote/table.py
index e2d88b98..a068af12 100644
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -11,6 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from datetime import timedelta
import asyncio
import logging
from functools import cached_property
@@ -478,6 +479,19 @@ class RemoteTable(Table):
"compact_files() is not supported on the LanceDB cloud"
)
+ def optimize(
+ self,
+ *,
+ cleanup_older_than: Optional[timedelta] = None,
+ delete_unverified: bool = False,
+ ):
+ """optimize() is not supported on the LanceDB cloud.
+ Indices are optimized automatically."""
+ raise NotImplementedError(
+ "optimize() is not supported on the LanceDB cloud. "
+ "Indices are optimized automatically."
+ )
+
def count_rows(self, filter: Optional[str] = None) -> int:
return self._loop.run_until_complete(self._table.count_rows(filter))
diff --git a/python/python/lancedb/table.py b/python/python/lancedb/table.py
index 18e2c266..6403c88f 100644
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -3,6 +3,7 @@
from __future__ import annotations
+import asyncio
import inspect
import time
from abc import ABC, abstractmethod
@@ -32,7 +33,7 @@ import pyarrow.fs as pa_fs
from lance import LanceDataset
from lance.dependencies import _check_for_hugging_face
-from .common import DATA, VEC, VECTOR_COLUMN_NAME
+from .common import DATA, VEC, VECTOR_COLUMN_NAME, sanitize_uri
from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
from .merge import LanceMergeInsertBuilder
from .pydantic import LanceModel, model_to_dict
@@ -57,6 +58,8 @@ from .util import (
)
from .index import lang_mapping
+from ._lancedb import connect as lancedb_connect
+
if TYPE_CHECKING:
import PIL
from lance.dataset import CleanupStats, ReaderLike
@@ -893,6 +896,55 @@ class Table(ABC):
For most cases, the default should be fine.
"""
+ @abstractmethod
+ def optimize(
+ self,
+ *,
+ cleanup_older_than: Optional[timedelta] = None,
+ delete_unverified: bool = False,
+ ):
+ """
+ Optimize the on-disk data and indices for better performance.
+
+ Modeled after ``VACUUM`` in PostgreSQL.
+
+ Optimization covers three operations:
+
+ * Compaction: Merges small files into larger ones
+ * Prune: Removes old versions of the dataset
+ * Index: Optimizes the indices, adding new data to existing indices
+
+ Parameters
+ ----------
+ cleanup_older_than: timedelta, optional default 7 days
+ All files belonging to versions older than this will be removed. Set
+ to 0 days to remove all versions except the latest. The latest version
+ is never removed.
+ delete_unverified: bool, default False
+ Files leftover from a failed transaction may appear to be part of an
+ in-progress operation (e.g. appending new data) and these files will not
+ be deleted unless they are at least 7 days old. If delete_unverified is True
+ then these files will be deleted regardless of their age.
+
+ Experimental API
+ ----------------
+
+ The optimization process is undergoing active development and may change.
+ Our goal with these changes is to improve the performance of optimization and
+ reduce the complexity.
+
+ That being said, it is essential today to run optimize if you want the best
+ performance. It should be stable and safe to use in production, but it our
+ hope that the API may be simplified (or not even need to be called) in the
+ future.
+
+ The frequency an application shoudl call optimize is based on the frequency of
+ data modifications. If data is frequently added, deleted, or updated then
+ optimize should be run frequently. A good rule of thumb is to run optimize if
+ you have added or modified 100,000 or more records or run more than 20 data
+ modification operations.
+ """
+
@abstractmethod
def add_columns(self, transforms: Dict[str, str]):
"""
@@ -1971,6 +2023,83 @@ class LanceTable(Table):
"""
return self.to_lance().optimize.compact_files(*args, **kwargs)
+ def optimize(
+ self,
+ *,
+ cleanup_older_than: Optional[timedelta] = None,
+ delete_unverified: bool = False,
+ ):
+ """
+ Optimize the on-disk data and indices for better performance.
+
+ Modeled after ``VACUUM`` in PostgreSQL.
+
+ Optimization covers three operations:
+
+ * Compaction: Merges small files into larger ones
+ * Prune: Removes old versions of the dataset
+ * Index: Optimizes the indices, adding new data to existing indices
+
+ Parameters
+ ----------
+ cleanup_older_than: timedelta, optional default 7 days
+ All files belonging to versions older than this will be removed. Set
+ to 0 days to remove all versions except the latest. The latest version
+ is never removed.
+ delete_unverified: bool, default False
+ Files leftover from a failed transaction may appear to be part of an
+ in-progress operation (e.g. appending new data) and these files will not
+ be deleted unless they are at least 7 days old. If delete_unverified is True
+ then these files will be deleted regardless of their age.
+
+ Experimental API
+ ----------------
+
+ The optimization process is undergoing active development and may change.
+ Our goal with these changes is to improve the performance of optimization and
+ reduce the complexity.
+
+ That being said, it is essential today to run optimize if you want the best
+ performance. It should be stable and safe to use in production, but it our
+ hope that the API may be simplified (or not even need to be called) in the
+ future.
+
+ The frequency an application shoudl call optimize is based on the frequency of
+ data modifications. If data is frequently added, deleted, or updated then
+ optimize should be run frequently. A good rule of thumb is to run optimize if
+ you have added or modified 100,000 or more records or run more than 20 data
+ modification operations.
+ """
+ try:
+ asyncio.get_running_loop()
+ raise AssertionError(
+ "Synchronous method called in asynchronous context. "
+ "If you are writing an asynchronous application "
+ "then please use the asynchronous APIs"
+ )
+
+ except RuntimeError:
+ asyncio.run(
+ self._async_optimize(
+ cleanup_older_than=cleanup_older_than,
+ delete_unverified=delete_unverified,
+ )
+ )
+ self.checkout_latest()
+
+ async def _async_optimize(
+ self,
+ cleanup_older_than: Optional[timedelta] = None,
+ delete_unverified: bool = False,
+ ):
+ conn = await lancedb_connect(
+ sanitize_uri(self._conn.uri),
+ )
+ table = AsyncTable(await conn.open_table(self.name))
+ return await table.optimize(
+ cleanup_older_than=cleanup_older_than, delete_unverified=delete_unverified
+ )
+
def add_columns(self, transforms: Dict[str, str]):
self._dataset_mut.add_columns(transforms)
diff --git a/python/python/tests/test_table.py b/python/python/tests/test_table.py
index bdf22ddf..7ed367cb 100644
--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -1223,6 +1223,54 @@ async def test_time_travel(db_async: AsyncConnection):
await table.restore()
+def test_sync_optimize(db):
+ table = LanceTable.create(
+ db,
+ "test",
+ data=[
+ {"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
+ {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
+ ],
+ )
+
+ table.create_scalar_index("price", index_type="BTREE")
+ stats = table.to_lance().stats.index_stats("price_idx")
+ assert stats["num_indexed_rows"] == 2
+
+ table.add([{"vector": [2.0, 2.0], "item": "baz", "price": 30.0}])
+ assert table.count_rows() == 3
+ table.optimize()
+ stats = table.to_lance().stats.index_stats("price_idx")
+ assert stats["num_indexed_rows"] == 3
+
+
+@pytest.mark.asyncio
+async def test_sync_optimize_in_async(db):
+ table = LanceTable.create(
+ db,
+ "test",
+ data=[
+ {"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
+ {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
+ ],
+ )
+
+ table.create_scalar_index("price", index_type="BTREE")
+ stats = table.to_lance().stats.index_stats("price_idx")
+ assert stats["num_indexed_rows"] == 2
+
+ table.add([{"vector": [2.0, 2.0], "item": "baz", "price": 30.0}])
+ assert table.count_rows() == 3
+ try:
+ table.optimize()
+ except Exception as e:
+ assert (
+ "Synchronous method called in asynchronous context. "
+ "If you are writing an asynchronous application "
+ "then please use the asynchronous APIs" in str(e)
+ )
+
+
@pytest.mark.asyncio
async def test_optimize(db_async: AsyncConnection):
table = await db_async.create_table(
From 0ed77fa99064d56a2842edafa6faa6f7a1810952 Mon Sep 17 00:00:00 2001
From: BubbleCal
Date: Sat, 9 Nov 2024 01:07:43 +0800
Subject: [PATCH 2/6] chore: impl Debug & Clone for Index params (#1808)
we don't really need these trait in lancedb, but all fields in `Index`
implement the 2 traits, so do it for possibility to use `Index`
somewhere
Signed-off-by: BubbleCal
---
rust/lancedb/src/index.rs | 1 +
1 file changed, 1 insertion(+)
diff --git a/rust/lancedb/src/index.rs b/rust/lancedb/src/index.rs
index 6ec6249e..432e01c2 100644
--- a/rust/lancedb/src/index.rs
+++ b/rust/lancedb/src/index.rs
@@ -29,6 +29,7 @@ pub mod scalar;
pub mod vector;
/// Supported index types.
+#[derive(Debug, Clone)]
pub enum Index {
Auto,
/// A `BTree` index is an sorted index on scalar columns.
From 21021f94ca1e59345902c6dbc7303b724dabd393 Mon Sep 17 00:00:00 2001
From: Kursat Aktas
Date: Fri, 8 Nov 2024 21:55:22 +0300
Subject: [PATCH 3/6] docs: introducing LanceDB Guru on Gurubase.io (#1797)
Hello team,
I'm the maintainer of [Anteon](https://github.com/getanteon/anteon). We
have created Gurubase.io with the mission of building a centralized,
open-source tool-focused knowledge base. Essentially, each "guru" is
equipped with custom knowledge to answer user questions based on
collected data related to that tool.
I wanted to update you that I've manually added the [LanceDB
Guru](https://gurubase.io/g/lancedb) to Gurubase. LanceDB Guru uses the
data from this repo and data from the
[docs](https://lancedb.github.io/lancedb/) to answer questions by
leveraging the LLM.
In this PR, I showcased the "LanceDB Guru", which highlights that
LanceDB now has an AI assistant available to help users with their
questions. Please let me know your thoughts on this contribution.
Additionally, if you want me to disable LanceDB Guru in Gurubase, just
let me know that's totally fine.
Signed-off-by: Kursat Aktas
---
README.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/README.md b/README.md
index fa1218f1..fc0aa217 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,7 @@
[](https://blog.lancedb.com/)
[](https://discord.gg/zMM32dvNtd)
[](https://twitter.com/lancedb)
+[](https://gurubase.io/g/lancedb)
From cbbc07d0f523b4f8efd2c9fcf402acac8e283f07 Mon Sep 17 00:00:00 2001
From: fzowl <160063452+fzowl@users.noreply.github.com>
Date: Fri, 8 Nov 2024 20:21:20 +0100
Subject: [PATCH 4/6] feat: voyageai support (#1799)
Adding VoyageAI embedding and rerank support
---
.../voyageai_embedding.md | 51 +++++++
docs/src/reranking/voyageai.md | 77 ++++++++++
python/python/lancedb/embeddings/__init__.py | 1 +
python/python/lancedb/embeddings/voyageai.py | 127 +++++++++++++++++
python/python/lancedb/rerankers/__init__.py | 2 +
python/python/lancedb/rerankers/voyageai.py | 133 ++++++++++++++++++
python/python/tests/test_embeddings.py | 1 +
python/python/tests/test_embeddings_slow.py | 19 +++
python/python/tests/test_rerankers.py | 12 ++
9 files changed, 423 insertions(+)
create mode 100644 docs/src/embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
create mode 100644 docs/src/reranking/voyageai.md
create mode 100644 python/python/lancedb/embeddings/voyageai.py
create mode 100644 python/python/lancedb/rerankers/voyageai.py
diff --git a/docs/src/embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md b/docs/src/embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
new file mode 100644
index 00000000..41a6be31
--- /dev/null
+++ b/docs/src/embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
@@ -0,0 +1,51 @@
+# VoyageAI Embeddings
+
+Voyage AI provides cutting-edge embedding and rerankers.
+
+
+Using voyageai API requires voyageai package, which can be installed using `pip install voyageai`. Voyage AI embeddings are used to generate embeddings for text data. The embeddings can be used for various tasks like semantic search, clustering, and classification.
+You also need to set the `VOYAGE_API_KEY` environment variable to use the VoyageAI API.
+
+Supported models are:
+
+- voyage-3
+- voyage-3-lite
+- voyage-finance-2
+- voyage-multilingual-2
+- voyage-law-2
+- voyage-code-2
+
+
+Supported parameters (to be passed in `create` method) are:
+
+| Parameter | Type | Default Value | Description |
+|---|---|--------|---------|
+| `name` | `str` | `"voyage-3"` | The model ID of the model to use. Supported base models for Text Embeddings: voyage-3, voyage-3-lite, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2 |
+| `input_type` | `str` | `None` | Type of the input text. Default to None. Other options: query, document. |
+| `truncation` | `bool` | `True` | Whether to truncate the input texts to fit within the context length. |
+
+
+Usage Example:
+
+```python
+ import lancedb
+ from lancedb.pydantic import LanceModel, Vector
+ from lancedb.embeddings import EmbeddingFunctionRegistry
+
+ voyageai = EmbeddingFunctionRegistry
+ .get_instance()
+ .get("voyageai")
+ .create(name="voyage-3")
+
+ class TextModel(LanceModel):
+ text: str = voyageai.SourceField()
+ vector: Vector(voyageai.ndims()) = voyageai.VectorField()
+
+ data = [ { "text": "hello world" },
+ { "text": "goodbye world" }]
+
+ db = lancedb.connect("~/.lancedb")
+ tbl = db.create_table("test", schema=TextModel, mode="overwrite")
+
+ tbl.add(data)
+```
\ No newline at end of file
diff --git a/docs/src/reranking/voyageai.md b/docs/src/reranking/voyageai.md
new file mode 100644
index 00000000..4021729a
--- /dev/null
+++ b/docs/src/reranking/voyageai.md
@@ -0,0 +1,77 @@
+# Voyage AI Reranker
+
+Voyage AI provides cutting-edge embedding and rerankers.
+
+This re-ranker uses the [VoyageAI](https://docs.voyageai.com/docs/) API to rerank the search results. You can use this re-ranker by passing `VoyageAIReranker()` to the `rerank()` method. Note that you'll either need to set the `VOYAGE_API_KEY` environment variable or pass the `api_key` argument to use this re-ranker.
+
+
+!!! note
+ Supported Query Types: Hybrid, Vector, FTS
+
+
+```python
+import numpy
+import lancedb
+from lancedb.embeddings import get_registry
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.rerankers import VoyageAIReranker
+
+embedder = get_registry().get("sentence-transformers").create()
+db = lancedb.connect("~/.lancedb")
+
+class Schema(LanceModel):
+ text: str = embedder.SourceField()
+ vector: Vector(embedder.ndims()) = embedder.VectorField()
+
+data = [
+ {"text": "hello world"},
+ {"text": "goodbye world"}
+ ]
+tbl = db.create_table("test", schema=Schema, mode="overwrite")
+tbl.add(data)
+reranker = VoyageAIReranker(model_name="rerank-2")
+
+# Run vector search with a reranker
+result = tbl.search("hello").rerank(reranker=reranker).to_list()
+
+# Run FTS search with a reranker
+result = tbl.search("hello", query_type="fts").rerank(reranker=reranker).to_list()
+
+# Run hybrid search with a reranker
+tbl.create_fts_index("text", replace=True)
+result = tbl.search("hello", query_type="hybrid").rerank(reranker=reranker).to_list()
+
+```
+
+Accepted Arguments
+----------------
+| Argument | Type | Default | Description |
+| --- | --- | --- | --- |
+| `model_name` | `str` | `None` | The name of the reranker model to use. Available models are: rerank-2, rerank-2-lite |
+| `column` | `str` | `"text"` | The name of the column to use as input to the cross encoder model. |
+| `top_n` | `str` | `None` | The number of results to return. If None, will return all results. |
+| `api_key` | `str` | `None` | The API key for the Voyage AI API. If not provided, the `VOYAGE_API_KEY` environment variable is used. |
+| `return_score` | str | `"relevance"` | Options are "relevance" or "all". The type of score to return. If "relevance", will return only the `_relevance_score. If "all" is supported, will return relevance score along with the vector and/or fts scores depending on query type |
+| `truncation` | `bool` | `None` | Whether to truncate the input to satisfy the "context length limit" on the query and the documents. |
+
+
+## Supported Scores for each query type
+You can specify the type of scores you want the reranker to return. The following are the supported scores for each query type:
+
+### Hybrid Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ❌ Not Supported | Returns have vector(`_distance`) and FTS(`score`) along with Hybrid Search score(`_relevance_score`) |
+
+### Vector Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ✅ Supported | Returns have vector(`_distance`) along with Hybrid Search score(`_relevance_score`) |
+
+### FTS Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ✅ Supported | Returns have FTS(`score`) along with Hybrid Search score(`_relevance_score`) |
\ No newline at end of file
diff --git a/python/python/lancedb/embeddings/__init__.py b/python/python/lancedb/embeddings/__init__.py
index 76da3ab4..afa127d7 100644
--- a/python/python/lancedb/embeddings/__init__.py
+++ b/python/python/lancedb/embeddings/__init__.py
@@ -27,3 +27,4 @@ from .imagebind import ImageBindEmbeddings
from .utils import with_embeddings
from .jinaai import JinaEmbeddings
from .watsonx import WatsonxEmbeddings
+from .voyageai import VoyageAIEmbeddingFunction
diff --git a/python/python/lancedb/embeddings/voyageai.py b/python/python/lancedb/embeddings/voyageai.py
new file mode 100644
index 00000000..161c5e43
--- /dev/null
+++ b/python/python/lancedb/embeddings/voyageai.py
@@ -0,0 +1,127 @@
+# Copyright (c) 2023. LanceDB Developers
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import ClassVar, List, Union
+
+import numpy as np
+
+from ..util import attempt_import_or_raise
+from .base import TextEmbeddingFunction
+from .registry import register
+from .utils import api_key_not_found_help, TEXT
+
+
+@register("voyageai")
+class VoyageAIEmbeddingFunction(TextEmbeddingFunction):
+ """
+ An embedding function that uses the VoyageAI API
+
+ https://docs.voyageai.com/docs/embeddings
+
+ Parameters
+ ----------
+ name: str
+ The name of the model to use. List of acceptable models:
+
+ * voyage-3
+ * voyage-3-lite
+ * voyage-finance-2
+ * voyage-multilingual-2
+ * voyage-law-2
+ * voyage-code-2
+
+
+ Examples
+ --------
+ import lancedb
+ from lancedb.pydantic import LanceModel, Vector
+ from lancedb.embeddings import EmbeddingFunctionRegistry
+
+ voyageai = EmbeddingFunctionRegistry
+ .get_instance()
+ .get("voyageai")
+ .create(name="voyage-3")
+
+ class TextModel(LanceModel):
+ text: str = voyageai.SourceField()
+ vector: Vector(voyageai.ndims()) = voyageai.VectorField()
+
+ data = [ { "text": "hello world" },
+ { "text": "goodbye world" }]
+
+ db = lancedb.connect("~/.lancedb")
+ tbl = db.create_table("test", schema=TextModel, mode="overwrite")
+
+ tbl.add(data)
+
+ """
+
+ name: str
+ client: ClassVar = None
+
+ def ndims(self):
+ if self.name == "voyage-3-lite":
+ return 512
+ elif self.name == "voyage-code-2":
+ return 1536
+ elif self.name in [
+ "voyage-3",
+ "voyage-finance-2",
+ "voyage-multilingual-2",
+ "voyage-law-2",
+ ]:
+ return 1024
+ else:
+ raise ValueError(f"Model {self.name} not supported")
+
+ def compute_query_embeddings(self, query: str, *args, **kwargs) -> List[np.array]:
+ return self.compute_source_embeddings(query, input_type="query")
+
+ def compute_source_embeddings(self, texts: TEXT, *args, **kwargs) -> List[np.array]:
+ texts = self.sanitize_input(texts)
+ input_type = (
+ kwargs.get("input_type") or "document"
+ ) # assume source input type if not passed by `compute_query_embeddings`
+ return self.generate_embeddings(texts, input_type=input_type)
+
+ def generate_embeddings(
+ self, texts: Union[List[str], np.ndarray], *args, **kwargs
+ ) -> List[np.array]:
+ """
+ Get the embeddings for the given texts
+
+ Parameters
+ ----------
+ texts: list[str] or np.ndarray (of str)
+ The texts to embed
+ input_type: Optional[str]
+
+ truncation: Optional[bool]
+ """
+ VoyageAIEmbeddingFunction._init_client()
+ rs = VoyageAIEmbeddingFunction.client.embed(
+ texts=texts, model=self.name, **kwargs
+ )
+
+ return [emb for emb in rs.embeddings]
+
+ @staticmethod
+ def _init_client():
+ if VoyageAIEmbeddingFunction.client is None:
+ voyageai = attempt_import_or_raise("voyageai")
+ if os.environ.get("VOYAGE_API_KEY") is None:
+ api_key_not_found_help("voyageai")
+ VoyageAIEmbeddingFunction.client = voyageai.Client(
+ os.environ["VOYAGE_API_KEY"]
+ )
diff --git a/python/python/lancedb/rerankers/__init__.py b/python/python/lancedb/rerankers/__init__.py
index 93903a16..c3e27331 100644
--- a/python/python/lancedb/rerankers/__init__.py
+++ b/python/python/lancedb/rerankers/__init__.py
@@ -7,6 +7,7 @@ from .openai import OpenaiReranker
from .jinaai import JinaReranker
from .rrf import RRFReranker
from .answerdotai import AnswerdotaiRerankers
+from .voyageai import VoyageAIReranker
__all__ = [
"Reranker",
@@ -18,4 +19,5 @@ __all__ = [
"JinaReranker",
"RRFReranker",
"AnswerdotaiRerankers",
+ "VoyageAIReranker",
]
diff --git a/python/python/lancedb/rerankers/voyageai.py b/python/python/lancedb/rerankers/voyageai.py
new file mode 100644
index 00000000..d04a5ad4
--- /dev/null
+++ b/python/python/lancedb/rerankers/voyageai.py
@@ -0,0 +1,133 @@
+# Copyright (c) 2023. LanceDB Developers
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from functools import cached_property
+from typing import Union, Optional
+
+import pyarrow as pa
+
+from ..util import attempt_import_or_raise
+from .base import Reranker
+
+
+class VoyageAIReranker(Reranker):
+ """
+ Reranks the results using the VoyageAI Rerank API.
+ https://docs.voyageai.com/docs/reranker
+
+ Parameters
+ ----------
+ model_name : str, default "rerank-english-v2.0"
+ The name of the cross encoder model to use. Available voyageai models are:
+ - rerank-2
+ - rerank-2-lite
+ column : str, default "text"
+ The name of the column to use as input to the cross encoder model.
+ top_n : int, default None
+ The number of results to return. If None, will return all results.
+ return_score : str, default "relevance"
+ options are "relevance" or "all". Only "relevance" is supported for now.
+ api_key : str, default None
+ The API key to use. If None, will use the OPENAI_API_KEY environment variable.
+ truncation : Optional[bool], default None
+ """
+
+ def __init__(
+ self,
+ model_name: str,
+ column: str = "text",
+ top_n: Optional[int] = None,
+ return_score="relevance",
+ api_key: Optional[str] = None,
+ truncation: Optional[bool] = True,
+ ):
+ super().__init__(return_score)
+ self.model_name = model_name
+ self.column = column
+ self.top_n = top_n
+ self.api_key = api_key
+ self.truncation = truncation
+
+ @cached_property
+ def _client(self):
+ voyageai = attempt_import_or_raise("voyageai")
+ if os.environ.get("VOYAGE_API_KEY") is None and self.api_key is None:
+ raise ValueError(
+ "VOYAGE_API_KEY not set. Either set it in your environment or \
+ pass it as `api_key` argument to the VoyageAIReranker."
+ )
+ return voyageai.Client(
+ api_key=os.environ.get("VOYAGE_API_KEY") or self.api_key,
+ )
+
+ def _rerank(self, result_set: pa.Table, query: str):
+ docs = result_set[self.column].to_pylist()
+ response = self._client.rerank(
+ query=query,
+ documents=docs,
+ top_k=self.top_n,
+ model=self.model_name,
+ truncation=self.truncation,
+ )
+ results = (
+ response.results
+ ) # returns list (text, idx, relevance) attributes sorted descending by score
+ indices, scores = list(
+ zip(*[(result.index, result.relevance_score) for result in results])
+ ) # tuples
+ result_set = result_set.take(list(indices))
+ # add the scores
+ result_set = result_set.append_column(
+ "_relevance_score", pa.array(scores, type=pa.float32())
+ )
+
+ return result_set
+
+ def rerank_hybrid(
+ self,
+ query: str,
+ vector_results: pa.Table,
+ fts_results: pa.Table,
+ ):
+ combined_results = self.merge_results(vector_results, fts_results)
+ combined_results = self._rerank(combined_results, query)
+ if self.score == "relevance":
+ combined_results = self._keep_relevance_score(combined_results)
+ elif self.score == "all":
+ raise NotImplementedError(
+ "return_score='all' not implemented for voyageai reranker"
+ )
+ return combined_results
+
+ def rerank_vector(
+ self,
+ query: str,
+ vector_results: pa.Table,
+ ):
+ result_set = self._rerank(vector_results, query)
+ if self.score == "relevance":
+ result_set = result_set.drop_columns(["_distance"])
+
+ return result_set
+
+ def rerank_fts(
+ self,
+ query: str,
+ fts_results: pa.Table,
+ ):
+ result_set = self._rerank(fts_results, query)
+ if self.score == "relevance":
+ result_set = result_set.drop_columns(["_score"])
+
+ return result_set
diff --git a/python/python/tests/test_embeddings.py b/python/python/tests/test_embeddings.py
index e48fb209..a9f939ee 100644
--- a/python/python/tests/test_embeddings.py
+++ b/python/python/tests/test_embeddings.py
@@ -196,6 +196,7 @@ def test_add_optional_vector(tmp_path):
"ollama",
"cohere",
"instructor",
+ "voyageai",
],
)
def test_embedding_function_safe_model_dump(embedding_type):
diff --git a/python/python/tests/test_embeddings_slow.py b/python/python/tests/test_embeddings_slow.py
index 9e17ca66..58f9ff98 100644
--- a/python/python/tests/test_embeddings_slow.py
+++ b/python/python/tests/test_embeddings_slow.py
@@ -481,3 +481,22 @@ def test_ollama_embedding(tmp_path):
json.dumps(dumped_model)
except TypeError:
pytest.fail("Failed to JSON serialize the dumped model")
+
+
+@pytest.mark.slow
+@pytest.mark.skipif(
+ os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
+)
+def test_voyageai_embedding_function():
+ voyageai = get_registry().get("voyageai").create(name="voyage-3", max_retries=0)
+
+ class TextModel(LanceModel):
+ text: str = voyageai.SourceField()
+ vector: Vector(voyageai.ndims()) = voyageai.VectorField()
+
+ df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
+ db = lancedb.connect("~/lancedb")
+ tbl = db.create_table("test", schema=TextModel, mode="overwrite")
+
+ tbl.add(df)
+ assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
diff --git a/python/python/tests/test_rerankers.py b/python/python/tests/test_rerankers.py
index f2f7c6cc..4e1c6898 100644
--- a/python/python/tests/test_rerankers.py
+++ b/python/python/tests/test_rerankers.py
@@ -16,6 +16,7 @@ from lancedb.rerankers import (
OpenaiReranker,
JinaReranker,
AnswerdotaiRerankers,
+ VoyageAIReranker,
)
from lancedb.table import LanceTable
@@ -344,3 +345,14 @@ def test_jina_reranker(tmp_path, use_tantivy):
table, schema = get_test_table(tmp_path, use_tantivy)
reranker = JinaReranker()
_run_test_reranker(reranker, table, "single player experience", None, schema)
+
+
+@pytest.mark.skipif(
+ os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
+)
+@pytest.mark.parametrize("use_tantivy", [True, False])
+def test_voyageai_reranker(tmp_path, use_tantivy):
+ pytest.importorskip("voyageai")
+ reranker = VoyageAIReranker(model_name="rerank-2")
+ table, schema = get_test_table(tmp_path, use_tantivy)
+ _run_test_reranker(reranker, table, "single player experience", None, schema)
From b1c84e0bda3442403d7e2943c8dae8f7ed832f86 Mon Sep 17 00:00:00 2001
From: Umut Hope YILDIRIM
Date: Fri, 8 Nov 2024 14:40:57 -0500
Subject: [PATCH 5/6] feat: added lancedb and vectordb release ci for
win32-arm64-msvc npmjs only (#1805)
---
.github/workflows/npm-publish.yml | 216 ++++++++++++++++++++++-
nodejs/Cargo.toml | 2 +-
nodejs/npm/win32-arm64-msvc/package.json | 2 +-
3 files changed, 216 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/npm-publish.yml b/.github/workflows/npm-publish.yml
index 18b78dbf..0360b19f 100644
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -226,6 +226,112 @@ jobs:
path: |
node/dist/lancedb-vectordb-win32*.tgz
+ node-windows-arm64:
+ name: vectordb win32-arm64-msvc
+ runs-on: windows-4x-arm
+ if: startsWith(github.ref, 'refs/tags/v')
+ steps:
+ - uses: actions/checkout@v4
+ - name: Cache installations
+ id: cache-installs
+ uses: actions/cache@v4
+ with:
+ path: |
+ C:\Program Files\Git
+ C:\BuildTools
+ C:\Program Files (x86)\Windows Kits
+ C:\Program Files\7-Zip
+ C:\protoc
+ key: ${{ runner.os }}-arm64-installs-v1
+ restore-keys: |
+ ${{ runner.os }}-arm64-installs-
+ - name: Install Git
+ if: steps.cache-installs.outputs.cache-hit != 'true'
+ run: |
+ Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
+ Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
+ shell: powershell
+ - name: Add Git to PATH
+ run: |
+ Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
+ $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
+ shell: powershell
+ - name: Configure Git symlinks
+ run: git config --global core.symlinks true
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.13"
+ - name: Install Visual Studio Build Tools
+ if: steps.cache-installs.outputs.cache-hit != 'true'
+ run: |
+ Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
+ Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
+ "--installPath", "C:\BuildTools", `
+ "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
+ "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
+ "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
+ "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
+ "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
+ "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
+ shell: powershell
+ - name: Add Visual Studio Build Tools to PATH
+ run: |
+ $vsPath = "C:\BuildTools\VC\Tools\MSVC"
+ $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
+ Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
+ Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
+ Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
+ Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
+ Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
+
+ $env:LIB = ""
+ Add-Content $env:GITHUB_ENV "LIB=C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
+ shell: powershell
+ - name: Install Rust
+ run: |
+ Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
+ .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
+ shell: powershell
+ - name: Add Rust to PATH
+ run: |
+ Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
+ shell: powershell
+
+ - uses: Swatinem/rust-cache@v2
+ with:
+ workspaces: rust
+ - name: Install 7-Zip ARM
+ if: steps.cache-installs.outputs.cache-hit != 'true'
+ run: |
+ New-Item -Path 'C:\7zip' -ItemType Directory
+ Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
+ Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
+ shell: powershell
+ - name: Add 7-Zip to PATH
+ run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
+ shell: powershell
+ - name: Install Protoc v21.12
+ if: steps.cache-installs.outputs.cache-hit != 'true'
+ working-directory: C:\
+ run: |
+ New-Item -Path 'C:\protoc' -ItemType Directory
+ Set-Location C:\protoc
+ Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
+ & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
+ shell: powershell
+ - name: Add Protoc to PATH
+ run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
+ shell: powershell
+ - name: Build Windows native node modules
+ run: .\ci\build_windows_artifacts.ps1 aarch64-pc-windows-msvc
+ - name: Upload Windows ARM64 Artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: node-native-windows-arm64
+ path: |
+ node/dist/*.node
+
nodejs-windows:
name: lancedb ${{ matrix.target }}
runs-on: windows-2022
@@ -260,9 +366,115 @@ jobs:
path: |
nodejs/dist/*.node
+ nodejs-windows-arm64:
+ name: lancedb win32-arm64-msvc
+ runs-on: windows-4x-arm
+ if: startsWith(github.ref, 'refs/tags/v')
+ steps:
+ - uses: actions/checkout@v4
+ - name: Cache installations
+ id: cache-installs
+ uses: actions/cache@v4
+ with:
+ path: |
+ C:\Program Files\Git
+ C:\BuildTools
+ C:\Program Files (x86)\Windows Kits
+ C:\Program Files\7-Zip
+ C:\protoc
+ key: ${{ runner.os }}-arm64-installs-v1
+ restore-keys: |
+ ${{ runner.os }}-arm64-installs-
+ - name: Install Git
+ if: steps.cache-installs.outputs.cache-hit != 'true'
+ run: |
+ Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
+ Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
+ shell: powershell
+ - name: Add Git to PATH
+ run: |
+ Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
+ $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
+ shell: powershell
+ - name: Configure Git symlinks
+ run: git config --global core.symlinks true
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.13"
+ - name: Install Visual Studio Build Tools
+ if: steps.cache-installs.outputs.cache-hit != 'true'
+ run: |
+ Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
+ Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
+ "--installPath", "C:\BuildTools", `
+ "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
+ "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
+ "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
+ "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
+ "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
+ "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
+ shell: powershell
+ - name: Add Visual Studio Build Tools to PATH
+ run: |
+ $vsPath = "C:\BuildTools\VC\Tools\MSVC"
+ $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
+ Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
+ Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
+ Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
+ Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
+ Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
+
+ $env:LIB = ""
+ Add-Content $env:GITHUB_ENV "LIB=C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
+ shell: powershell
+ - name: Install Rust
+ run: |
+ Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
+ .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
+ shell: powershell
+ - name: Add Rust to PATH
+ run: |
+ Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
+ shell: powershell
+
+ - uses: Swatinem/rust-cache@v2
+ with:
+ workspaces: rust
+ - name: Install 7-Zip ARM
+ if: steps.cache-installs.outputs.cache-hit != 'true'
+ run: |
+ New-Item -Path 'C:\7zip' -ItemType Directory
+ Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
+ Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
+ shell: powershell
+ - name: Add 7-Zip to PATH
+ run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
+ shell: powershell
+ - name: Install Protoc v21.12
+ if: steps.cache-installs.outputs.cache-hit != 'true'
+ working-directory: C:\
+ run: |
+ New-Item -Path 'C:\protoc' -ItemType Directory
+ Set-Location C:\protoc
+ Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
+ & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
+ shell: powershell
+ - name: Add Protoc to PATH
+ run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
+ shell: powershell
+ - name: Build Windows native node modules
+ run: .\ci\build_windows_artifacts_nodejs.ps1 aarch64-pc-windows-msvc
+ - name: Upload Windows ARM64 Artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: nodejs-native-windows-arm64
+ path: |
+ nodejs/dist/*.node
+
release:
name: vectordb NPM Publish
- needs: [node, node-macos, node-linux, node-windows]
+ needs: [node, node-macos, node-linux, node-windows, node-windows-arm64]
runs-on: ubuntu-latest
# Only runs on tags that matches the make-release action
if: startsWith(github.ref, 'refs/tags/v')
@@ -302,7 +514,7 @@ jobs:
release-nodejs:
name: lancedb NPM Publish
- needs: [nodejs-macos, nodejs-linux, nodejs-windows]
+ needs: [nodejs-macos, nodejs-linux, nodejs-windows, nodejs-windows-arm64]
runs-on: ubuntu-latest
# Only runs on tags that matches the make-release action
if: startsWith(github.ref, 'refs/tags/v')
diff --git a/nodejs/Cargo.toml b/nodejs/Cargo.toml
index ba7af8da..f2a79408 100644
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -18,7 +18,7 @@ futures.workspace = true
lancedb = { path = "../rust/lancedb", features = ["remote"] }
napi = { version = "2.16.8", default-features = false, features = [
"napi9",
- "async",
+ "async"
] }
napi-derive = "2.16.4"
# Prevent dynamic linking of lzma, which comes from datafusion
diff --git a/nodejs/npm/win32-arm64-msvc/package.json b/nodejs/npm/win32-arm64-msvc/package.json
index 0478cef7..e35e6e08 100644
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
- "version": "0.12.0",
+ "version": "0.13.0-beta.1",
"os": [
"win32"
],
From 729718cb09fe7d93338380ca7e1ed7905497cb6c Mon Sep 17 00:00:00 2001
From: Umut Hope YILDIRIM
Date: Fri, 8 Nov 2024 17:49:37 -0500
Subject: [PATCH 6/6] fix: arm64 runner proto already installed bug (#1810)
https://github.com/lancedb/lancedb/actions/runs/11748512661/job/32732745458
---
.github/workflows/npm-publish.yml | 22 ++++++++++++++++++++--
.github/workflows/rust.yml | 18 ++++++++++++++++--
2 files changed, 36 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/npm-publish.yml b/.github/workflows/npm-publish.yml
index 0360b19f..db7b6d08 100644
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -285,8 +285,18 @@ jobs:
Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
- $env:LIB = ""
- Add-Content $env:GITHUB_ENV "LIB=C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
+ # Add MSVC runtime libraries to LIB
+ $env:LIB = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\lib\arm64;" +
+ "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;" +
+ "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
+ Add-Content $env:GITHUB_ENV "LIB=$env:LIB"
+
+ # Add INCLUDE paths
+ $env:INCLUDE = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\include;" +
+ "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\ucrt;" +
+ "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\um;" +
+ "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\shared"
+ Add-Content $env:GITHUB_ENV "INCLUDE=$env:INCLUDE"
shell: powershell
- name: Install Rust
run: |
@@ -315,6 +325,10 @@ jobs:
if: steps.cache-installs.outputs.cache-hit != 'true'
working-directory: C:\
run: |
+ if (Test-Path 'C:\protoc') {
+ Write-Host "Protoc directory exists, skipping installation"
+ return
+ }
New-Item -Path 'C:\protoc' -ItemType Directory
Set-Location C:\protoc
Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
@@ -455,6 +469,10 @@ jobs:
if: steps.cache-installs.outputs.cache-hit != 'true'
working-directory: C:\
run: |
+ if (Test-Path 'C:\protoc') {
+ Write-Host "Protoc directory exists, skipping installation"
+ return
+ }
New-Item -Path 'C:\protoc' -ItemType Directory
Set-Location C:\protoc
Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index ab02b499..29e47ecc 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -195,8 +195,18 @@ jobs:
Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
- $env:LIB = ""
- Add-Content $env:GITHUB_ENV "LIB=C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
+ # Add MSVC runtime libraries to LIB
+ $env:LIB = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\lib\arm64;" +
+ "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;" +
+ "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
+ Add-Content $env:GITHUB_ENV "LIB=$env:LIB"
+
+ # Add INCLUDE paths
+ $env:INCLUDE = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\include;" +
+ "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\ucrt;" +
+ "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\um;" +
+ "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\shared"
+ Add-Content $env:GITHUB_ENV "INCLUDE=$env:INCLUDE"
shell: powershell
- name: Install Rust
run: |
@@ -225,6 +235,10 @@ jobs:
if: steps.cache-installs.outputs.cache-hit != 'true'
working-directory: C:\
run: |
+ if (Test-Path 'C:\protoc') {
+ Write-Host "Protoc directory exists, skipping installation"
+ return
+ }
New-Item -Path 'C:\protoc' -ItemType Directory
Set-Location C:\protoc
Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip