From 311c7a70a37d35cbbdc36f74dd306e4de0b7d78b Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Mon, 12 Feb 2024 23:34:02 -0500 Subject: [PATCH 4/6] TST: Fix test_str_encode on big endian machines I couldn't find a way to specify the endianness when creating the `ArrowDtype`, so just pick the right result based on native byte order. Signed-off-by: Elliott Sales de Andrade --- pandas/tests/extension/test_arrow.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index d9a3033b83..c551fff040 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -26,6 +26,7 @@ from io import ( import operator import pickle import re +import sys import numpy as np import pytest @@ -2106,14 +2107,21 @@ def test_str_removeprefix(val): @pytest.mark.parametrize( "encoding, exp", [ - ["utf8", b"abc"], - ["utf32", b"\xff\xfe\x00\x00a\x00\x00\x00b\x00\x00\x00c\x00\x00\x00"], + ("utf8", {"little": b"abc", "big": "abc"}), + ( + "utf32", + { + "little": b"\xff\xfe\x00\x00a\x00\x00\x00b\x00\x00\x00c\x00\x00\x00", + "big": b"\x00\x00\xfe\xff\x00\x00\x00a\x00\x00\x00b\x00\x00\x00c", + }, + ), ], + ids=["utf8", "utf32"], ) def test_str_encode(errors, encoding, exp): ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string())) result = ser.str.encode(encoding, errors) - expected = pd.Series([exp, None], dtype=ArrowDtype(pa.binary())) + expected = pd.Series([exp[sys.byteorder], None], dtype=ArrowDtype(pa.binary())) tm.assert_series_equal(result, expected) -- 2.43.0