Files
lancedb/python/python/tests/conftest.py
Jack Ye f124c9d8d2 test: string type conversion in pandas 3.0+ (#2928)
Pandas 3.0+ string now converts to Arrow large_utf8. This PR mainly
makes sure our test accounts for the difference across the pandas
versions when constructing schema.
2026-01-21 13:40:48 -08:00

48 lines
1.2 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
from datetime import timedelta
from lancedb.db import AsyncConnection, DBConnection
import lancedb
import pytest
import pytest_asyncio
def pandas_string_type():
"""Return the PyArrow string type that pandas uses for string columns.
pandas 3.0+ uses large_string for string columns, pandas 2.x uses string.
"""
import pandas as pd
import pyarrow as pa
version = tuple(int(x) for x in pd.__version__.split(".")[:2])
if version >= (3, 0):
return pa.large_utf8()
return pa.utf8()
# Use an in-memory database for most tests.
@pytest.fixture
def mem_db() -> DBConnection:
return lancedb.connect("memory://")
# Use a temporary directory when we need to inspect the database files.
@pytest.fixture
def tmp_db(tmp_path) -> DBConnection:
return lancedb.connect(tmp_path)
@pytest_asyncio.fixture
async def mem_db_async() -> AsyncConnection:
return await lancedb.connect_async("memory://")
@pytest_asyncio.fixture
async def tmp_db_async(tmp_path) -> AsyncConnection:
return await lancedb.connect_async(
tmp_path, read_consistency_interval=timedelta(seconds=0)
)