From 311c7a70a37d35cbbdc36f74dd306e4de0b7d78b Mon Sep 17 00:00:00 2001
From: Elliott Sales de Andrade <quantum.analyst@gmail.com>
Date: Mon, 12 Feb 2024 23:34:02 -0500
Subject: [PATCH 4/6] TST: Fix test_str_encode on big endian machines
I couldn't find a way to specify the endianness when creating the
`ArrowDtype`, so just pick the right result based on native byte order.
Signed-off-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
---
pandas/tests/extension/test_arrow.py | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index d9a3033b83..c551fff040 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -26,6 +26,7 @@ from io import (
import operator
import pickle
import re
+import sys
import numpy as np
import pytest
@@ -2106,14 +2107,21 @@ def test_str_removeprefix(val):
@pytest.mark.parametrize(
"encoding, exp",
[
- ["utf8", b"abc"],
- ["utf32", b"\xff\xfe\x00\x00a\x00\x00\x00b\x00\x00\x00c\x00\x00\x00"],
+ ("utf8", {"little": b"abc", "big": "abc"}),
+ (
+ "utf32",
+ {
+ "little": b"\xff\xfe\x00\x00a\x00\x00\x00b\x00\x00\x00c\x00\x00\x00",
+ "big": b"\x00\x00\xfe\xff\x00\x00\x00a\x00\x00\x00b\x00\x00\x00c",
+ },
+ ),
],
+ ids=["utf8", "utf32"],
)
def test_str_encode(errors, encoding, exp):
ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string()))
result = ser.str.encode(encoding, errors)
- expected = pd.Series([exp, None], dtype=ArrowDtype(pa.binary()))
+ expected = pd.Series([exp[sys.byteorder], None], dtype=ArrowDtype(pa.binary()))
tm.assert_series_equal(result, expected)
--
2.43.0