test: string type conversion in pandas 3.0+ (#2928)

Pandas 3.0+ string now converts to Arrow large_utf8. This PR mainly
makes sure our test accounts for the difference across the pandas
versions when constructing schema.
This commit is contained in:
Jack Ye
2026-01-21 13:40:48 -08:00
committed by GitHub
parent 4e65748abf
commit f124c9d8d2
3 changed files with 38 additions and 7 deletions

View File

@@ -2,12 +2,27 @@
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
from datetime import timedelta
from lancedb.db import AsyncConnection, DBConnection
import lancedb
import pytest
import pytest_asyncio
def pandas_string_type():
"""Return the PyArrow string type that pandas uses for string columns.
pandas 3.0+ uses large_string for string columns, pandas 2.x uses string.
"""
import pandas as pd
import pyarrow as pa
version = tuple(int(x) for x in pd.__version__.split(".")[:2])
if version >= (3, 0):
return pa.large_utf8()
return pa.utf8()
# Use an in-memory database for most tests.
@pytest.fixture
def mem_db() -> DBConnection: