From a405847f9b6f2feafda21dec2e68c6cc267943f3 Mon Sep 17 00:00:00 2001 From: Antonio Molner Domenech Date: Mon, 9 Sep 2024 20:35:53 +0100 Subject: [PATCH] fix(python): remove unmaintained ratelimiter dependency (#1603) The `ratelimiter` package hasn't been updated in ages and is no longer maintained. This PR removes the dependency on `ratelimiter` and replaces it with a custom rate limiter implementation. --------- Co-authored-by: Will Jones --- python/pyproject.toml | 1 - python/python/lancedb/embeddings/utils.py | 58 +++++++++++++++++------ python/python/tests/test_embeddings.py | 4 -- 3 files changed, 43 insertions(+), 20 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 7d41d891..31b3af2e 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,7 +4,6 @@ name = "lancedb" dependencies = [ "deprecation", "pylance==0.17.0", - "ratelimiter~=1.0", "requests>=2.31.0", "retry>=0.9.2", "tqdm>=4.27.0", diff --git a/python/python/lancedb/embeddings/utils.py b/python/python/lancedb/embeddings/utils.py index 813631ca..ce5396d7 100644 --- a/python/python/lancedb/embeddings/utils.py +++ b/python/python/lancedb/embeddings/utils.py @@ -16,6 +16,7 @@ import math import random import socket import sys +import threading import time import urllib.error import weakref @@ -38,6 +39,42 @@ IMAGES = Union[ AUDIO = Union[str, bytes, List[str], List[bytes], pa.Array, pa.ChunkedArray, np.ndarray] +class RateLimiter: + def __init__(self, max_calls: int = 1, period: float = 1.0): + self.period = period + self.max_calls = max(1, min(sys.maxsize, math.floor(max_calls))) + + self._last_reset = time.time() + self._num_calls = 0 + self._lock = threading.RLock() + + def _check_sleep(self) -> float: + current_time = time.time() + elapsed = current_time - self._last_reset + period_remaining = self.period - elapsed + + # If the time window has elapsed then reset. + if period_remaining <= 0: + self._num_calls = 0 + self._last_reset = current_time + + self._num_calls += 1 + + if self._num_calls > self.max_calls: + return period_remaining + + return 0.0 + + def __call__(self, func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + with self._lock: + time.sleep(self._check_sleep()) + return func(*args, **kwargs) + + return wrapper + + @deprecated def with_embeddings( func: Callable, @@ -109,21 +146,12 @@ class FunctionWrapper: def embed_func(c): return self.func(c.tolist()) - if len(self.rate_limiter_kwargs) > 0: - v = int(sys.version_info.minor) - if v >= 11: - print( - "WARNING: rate limit only support up to 3.10, proceeding " - "without rate limiter" - ) - else: - import ratelimiter - - max_calls = self.rate_limiter_kwargs["max_calls"] - limiter = ratelimiter.RateLimiter( - max_calls, period=self.rate_limiter_kwargs["period"] - ) - embed_func = limiter(embed_func) + if self.rate_limiter_kwargs: + limiter = RateLimiter( + max_calls=self.rate_limiter_kwargs["max_calls"], + period=self.rate_limiter_kwargs["period"], + ) + embed_func = limiter(embed_func) batches = self.to_batches(text) embeds = [emb for c in batches for emb in embed_func(c)] return embeds diff --git a/python/python/tests/test_embeddings.py b/python/python/tests/test_embeddings.py index ed7b105a..05886699 100644 --- a/python/python/tests/test_embeddings.py +++ b/python/python/tests/test_embeddings.py @@ -10,7 +10,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import sys from typing import List, Union import lance @@ -35,9 +34,6 @@ def mock_embed_func(input_data): def test_with_embeddings(): for wrap_api in [True, False]: - if wrap_api and sys.version_info.minor >= 11: - # ratelimiter package doesn't work on 3.11 - continue data = pa.Table.from_arrays( [ pa.array(["foo", "bar"]),