From 38eb05f29716f92f64c0664cbcfd2c46ffc3cf7e Mon Sep 17 00:00:00 2001 From: James Wu Date: Tue, 15 Oct 2024 15:13:57 -0700 Subject: [PATCH] fix(python): remove dependency on retry package (#1749) ## user story fixes https://github.com/lancedb/lancedb/issues/1480 https://github.com/invl/retry has not had an update in 8 years, one if its sub-dependencies via requirements.txt (https://github.com/pytest-dev/py) is no longer maintained and has a high severity vulnerability (CVE-2022-42969). retry is only used for a single function in the python codebase for a deprecated helper function `with_embeddings`, which was created for an older tutorial (https://github.com/lancedb/lancedb/pull/12) [but is now deprecated](https://lancedb.github.io/lancedb/embeddings/legacy/). ## changes i backported a limited range of functionality of the `@retry()` decorator directly into lancedb so that we no longer have a dependency to the `retry` package. ## tests ``` /Users/james/src/lancedb/python $ ruff check . All checks passed! /Users/james/src/lancedb/python $ pytest python/tests/test_embeddings.py python/tests/test_embeddings.py .......s.... [100%] ================================================================ 11 passed, 1 skipped, 2 warnings in 7.08s ================================================================ ``` --- python/pyproject.toml | 1 - python/python/lancedb/embeddings/utils.py | 23 ++++++++++++++++++++++- python/python/tests/test_embeddings.py | 11 +++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 2bdc665e..394cc4a4 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -5,7 +5,6 @@ dependencies = [ "deprecation", "pylance==0.18.2", "requests>=2.31.0", - "retry>=0.9.2", "tqdm>=4.27.0", "pydantic>=1.10", "attrs>=21.3.0", diff --git a/python/python/lancedb/embeddings/utils.py b/python/python/lancedb/embeddings/utils.py index ce5396d7..59dcdb9b 100644 --- a/python/python/lancedb/embeddings/utils.py +++ b/python/python/lancedb/embeddings/utils.py @@ -21,14 +21,35 @@ import time import urllib.error import weakref import logging +from functools import wraps from typing import Callable, List, Union import numpy as np import pyarrow as pa from lance.vector import vec_to_table -from retry import retry from ..util import deprecated, safe_import_pandas + +# ruff: noqa: PERF203 +def retry(tries=10, delay=1, max_delay=30, backoff=3, jitter=1): + def wrapper(fn): + @wraps(fn) + def wrapped(*args, **kwargs): + for i in range(tries): + try: + return fn(*args, **kwargs) + except Exception: + if i + 1 == tries: + raise + else: + sleep = min(delay * (backoff**i) + jitter, max_delay) + time.sleep(sleep) + + return wrapped + + return wrapper + + pd = safe_import_pandas() DATA = Union[pa.Table, "pd.DataFrame"] diff --git a/python/python/tests/test_embeddings.py b/python/python/tests/test_embeddings.py index 9611f0ec..e48fb209 100644 --- a/python/python/tests/test_embeddings.py +++ b/python/python/tests/test_embeddings.py @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from typing import List, Union +from unittest.mock import MagicMock, patch import lance import lancedb @@ -25,6 +26,7 @@ from lancedb.embeddings import ( ) from lancedb.embeddings.base import TextEmbeddingFunction from lancedb.embeddings.registry import get_registry, register +from lancedb.embeddings.utils import retry from lancedb.pydantic import LanceModel, Vector @@ -225,3 +227,12 @@ def test_embedding_function_safe_model_dump(embedding_type): f"{embedding_type}: Private attribute '{key}' " f"is present in dumped model" ) + + +@patch("time.sleep") +def test_retry(mock_sleep): + test_function = MagicMock(side_effect=[Exception] * 9 + ["result"]) + test_function = retry()(test_function) + result = test_function() + assert mock_sleep.call_count == 9 + assert result == "result"