Files
lancedb/python/lancedb/conftest.py
Ayush Chaurasia 1589499f89 Exponential standoff retry support for handling rate limited embedding functions (#614)
Users ingesting data using rate limited apis don't need to manually make
the process sleep for counter rate limits
resolves #579
2023-11-02 19:20:10 +05:30

66 lines
1.8 KiB
Python

import os
import time
from typing import Any
import numpy as np
import pytest
from .embeddings import EmbeddingFunctionRegistry, TextEmbeddingFunction
# import lancedb so we don't have to in every example
@pytest.fixture(autouse=True)
def doctest_setup(monkeypatch, tmpdir):
# disable color for doctests so we don't have to include
# escape codes in docstrings
monkeypatch.setitem(os.environ, "NO_COLOR", "1")
# Explicitly set the column width
monkeypatch.setitem(os.environ, "COLUMNS", "80")
# Work in a temporary directory
monkeypatch.chdir(tmpdir)
registry = EmbeddingFunctionRegistry.get_instance()
@registry.register("test")
class MockTextEmbeddingFunction(TextEmbeddingFunction):
"""
Return the hash of the first 10 characters
"""
def generate_embeddings(self, texts):
return [self._compute_one_embedding(row) for row in texts]
def _compute_one_embedding(self, row):
emb = np.array([float(hash(c)) for c in row[:10]])
emb /= np.linalg.norm(emb)
return emb
def ndims(self):
return 10
class RateLimitedAPI:
rate_limit = 0.1 # 1 request per 0.1 second
last_request_time = 0
@staticmethod
def make_request():
current_time = time.time()
if current_time - RateLimitedAPI.last_request_time < RateLimitedAPI.rate_limit:
raise Exception("Rate limit exceeded. Please try again later.")
# Simulate a successful request
RateLimitedAPI.last_request_time = current_time
return "Request successful"
@registry.register("test-rate-limited")
class MockRateLimitedEmbeddingFunction(MockTextEmbeddingFunction):
def generate_embeddings(self, texts):
RateLimitedAPI.make_request()
return [self._compute_one_embedding(row) for row in texts]