test: string type conversion in pandas 3.0+ (#2928)

Pandas 3.0+ string now converts to Arrow large_utf8. This PR mainly
makes sure our test accounts for the difference across the pandas
versions when constructing schema.
This commit is contained in:
Jack Ye
2026-01-21 13:40:48 -08:00
committed by GitHub
parent 4e65748abf
commit f124c9d8d2
3 changed files with 38 additions and 7 deletions

View File

@@ -528,12 +528,19 @@ def test_sanitize_data(
else:
expected_schema = schema
else:
from conftest import pandas_string_type
# polars uses large_string, pandas 3.0+ uses large_string, others use string
if isinstance(data, pl.DataFrame):
text_type = pa.large_utf8()
elif isinstance(data, pd.DataFrame):
text_type = pandas_string_type()
else:
text_type = pa.string()
expected_schema = pa.schema(
{
"id": pa.int64(),
"text": pa.large_utf8()
if isinstance(data, pl.DataFrame)
else pa.string(),
"text": text_type,
"vector": pa.list_(pa.float32(), 10),
}
)