mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-14 07:42:58 +00:00
When region is not specific in the s3 path, `resolve_s3_region` from
"lance-format" project (see [here][1]) will resolve the region by
calling `resolve_bucket_region`, which is a function from the
"arrow-rs-object-store" project expecting [virtual-hosted-style
URLs][1]. When there are dot (".") in the virtual-hosted-style URLs, it
breaks automatic region detection. See more details in the issue
description:
https://github.com/lancedb/lancedb/issues/1898#issuecomment-3690142427
This PR add early validation in connect() and connect_async() to raise a
clear error with instructions when the region is not specified for such
buckets.
[1]:
https://github.com/lance-format/lance/blob/v2.0.0-beta.4/rust/lance-io/src/object_store/providers/aws.rs#L197
[2]:
eedbf3d7d8/src/aws/resolve.rs (L52C5-L52C65)
[3]:
https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#virtual-hosted-style-access
Fixes #1898
69 lines
2.3 KiB
Python
69 lines
2.3 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
|
|
"""
|
|
Tests for S3 bucket names containing dots.
|
|
|
|
Related issue: https://github.com/lancedb/lancedb/issues/1898
|
|
|
|
These tests validate the early error checking for S3 bucket names with dots.
|
|
No actual S3 connection is made - validation happens before connection.
|
|
"""
|
|
|
|
import pytest
|
|
import lancedb
|
|
|
|
# Test URIs
|
|
BUCKET_WITH_DOTS = "s3://my.bucket.name/path"
|
|
BUCKET_WITH_DOTS_AND_REGION = ("s3://my.bucket.name", {"region": "us-east-1"})
|
|
BUCKET_WITH_DOTS_AND_AWS_REGION = ("s3://my.bucket.name", {"aws_region": "us-east-1"})
|
|
BUCKET_WITHOUT_DOTS = "s3://my-bucket/path"
|
|
|
|
|
|
class TestS3BucketWithDotsSync:
|
|
"""Tests for connect()."""
|
|
|
|
def test_bucket_with_dots_requires_region(self):
|
|
with pytest.raises(ValueError, match="contains dots"):
|
|
lancedb.connect(BUCKET_WITH_DOTS)
|
|
|
|
def test_bucket_with_dots_and_region_passes(self):
|
|
uri, opts = BUCKET_WITH_DOTS_AND_REGION
|
|
db = lancedb.connect(uri, storage_options=opts)
|
|
assert db is not None
|
|
|
|
def test_bucket_with_dots_and_aws_region_passes(self):
|
|
uri, opts = BUCKET_WITH_DOTS_AND_AWS_REGION
|
|
db = lancedb.connect(uri, storage_options=opts)
|
|
assert db is not None
|
|
|
|
def test_bucket_without_dots_passes(self):
|
|
db = lancedb.connect(BUCKET_WITHOUT_DOTS)
|
|
assert db is not None
|
|
|
|
|
|
class TestS3BucketWithDotsAsync:
|
|
"""Tests for connect_async()."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_bucket_with_dots_requires_region(self):
|
|
with pytest.raises(ValueError, match="contains dots"):
|
|
await lancedb.connect_async(BUCKET_WITH_DOTS)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_bucket_with_dots_and_region_passes(self):
|
|
uri, opts = BUCKET_WITH_DOTS_AND_REGION
|
|
db = await lancedb.connect_async(uri, storage_options=opts)
|
|
assert db is not None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_bucket_with_dots_and_aws_region_passes(self):
|
|
uri, opts = BUCKET_WITH_DOTS_AND_AWS_REGION
|
|
db = await lancedb.connect_async(uri, storage_options=opts)
|
|
assert db is not None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_bucket_without_dots_passes(self):
|
|
db = await lancedb.connect_async(BUCKET_WITHOUT_DOTS)
|
|
assert db is not None
|