From 9f85d4c639fd830ae57a17721dea703b1e8fd144 Mon Sep 17 00:00:00 2001 From: lennylxx Date: Mon, 30 Mar 2026 12:03:44 -0700 Subject: [PATCH] fix(embeddings): add missing urllib.request import in url_retrieve (#3190) url_retrieve() calls urllib.request.urlopen() but only urllib.error was imported, causing AttributeError for any HTTP URL input. This affects open-clip, siglip, and jinaai embedding functions when processing image URLs. The bug has existed since the embeddings API refactor (#580) but was masked because most users pass local file paths or bytes rather than HTTP URLs. --- python/python/lancedb/embeddings/utils.py | 1 + python/python/tests/test_embeddings.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/python/python/lancedb/embeddings/utils.py b/python/python/lancedb/embeddings/utils.py index 1fefc78bf..189bbe53c 100644 --- a/python/python/lancedb/embeddings/utils.py +++ b/python/python/lancedb/embeddings/utils.py @@ -10,6 +10,7 @@ import sys import threading import time import urllib.error +import urllib.request import weakref import logging from functools import wraps diff --git a/python/python/tests/test_embeddings.py b/python/python/tests/test_embeddings.py index c78b822f1..3f2c1cba8 100644 --- a/python/python/tests/test_embeddings.py +++ b/python/python/tests/test_embeddings.py @@ -546,3 +546,23 @@ def test_openai_no_retry_on_401(mock_sleep): assert mock_func.call_count == 1 # Verify that sleep was never called (no retries) assert mock_sleep.call_count == 0 + + +def test_url_retrieve_downloads_image(): + """ + Embedding functions like open-clip, siglip, and jinaai use url_retrieve() + to download images from HTTP URLs. For example, open_clip._to_pil() calls: + + PIL_Image.open(io.BytesIO(url_retrieve(image))) + + Verify that url_retrieve() can download an image and open it as PIL Image, + matching the real usage pattern in embedding functions. + """ + import io + from PIL import Image + from lancedb.embeddings.utils import url_retrieve + + image_url = "http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg" + image_bytes = url_retrieve(image_url) + img = Image.open(io.BytesIO(image_bytes)) + assert img.size[0] > 0 and img.size[1] > 0