83a7c7
From 84920f898315d09a57a3f1067433eaeb7de5e830 Mon Sep 17 00:00:00 2001
83a7c7
Message-Id: <84920f898315d09a57a3f1067433eaeb7de5e830.1554884444.git.pmatilai@redhat.com>
83a7c7
From: Panu Matilainen <pmatilai@redhat.com>
83a7c7
Date: Fri, 22 Feb 2019 19:44:16 +0200
83a7c7
Subject: [PATCH] In Python 3, return all our string data as surrogate-escaped
83a7c7
 utf-8 strings
83a7c7
83a7c7
In the almost ten years of rpm sort of supporting Python 3 bindings, quite
83a7c7
obviously nobody has actually tried to use them. There's a major mismatch
83a7c7
between what the header API outputs (bytes) and what all the other APIs
83a7c7
accept (strings), resulting in hysterical TypeErrors all over the place,
83a7c7
including but not limited to labelCompare() (RhBug:1631292). Also a huge
83a7c7
number of other places have been returning strings and silently assuming
83a7c7
utf-8 through use of Py_BuildValue("s", ...), which will just irrevocably
83a7c7
fail when non-utf8 data is encountered.
83a7c7
83a7c7
The politically Python 3-correct solution would be declaring all our data
83a7c7
as bytes with unspecified encoding - that's exactly what it historically is.
83a7c7
However doing so would by definition break every single rpm script people
83a7c7
have developed on Python 2. And when 99% of the rpm content in the world
83a7c7
actually is utf-8 encoded even if it doesn't say so (and in recent times
83a7c7
packages even advertise themselves as utf-8 encoded), the bytes-only route
83a7c7
seems a wee bit too draconian, even to this grumpy old fella.
83a7c7
83a7c7
Instead, route all our string returns through a single helper macro
83a7c7
which on Python 2 just does what we always did, but in Python 3 converts
83a7c7
the data to surrogate-escaped utf-8 strings. This makes stuff "just work"
83a7c7
out of the box pretty much everywhere even with Python 3 (including
83a7c7
our own test-suite!), while still allowing to handle the non-utf8 case.
83a7c7
Handling the non-utf8 case is a bit more uglier but still possible,
83a7c7
which is exactly how you want corner-cases to be. There might be some
83a7c7
uses for retrieving raw byte data from the header, but worrying about
83a7c7
such an API is a case for some other rainy day, for now we mostly only
83a7c7
care that stuff works again.
83a7c7
83a7c7
Also add test-cases for mixed data source labelCompare() and
83a7c7
non-utf8 insert to + retrieve from header.
83a7c7
---
83a7c7
 python/header-py.c     |  2 +-
83a7c7
 python/rpmds-py.c      |  8 ++++----
83a7c7
 python/rpmfd-py.c      |  6 +++---
83a7c7
 python/rpmfi-py.c      | 24 ++++++++++++------------
83a7c7
 python/rpmfiles-py.c   | 26 +++++++++++++-------------
83a7c7
 python/rpmkeyring-py.c |  2 +-
83a7c7
 python/rpmmacro-py.c   |  2 +-
83a7c7
 python/rpmmodule.c     |  2 +-
83a7c7
 python/rpmps-py.c      |  8 ++++----
83a7c7
 python/rpmstrpool-py.c |  2 +-
83a7c7
 python/rpmsystem-py.h  |  7 +++++++
83a7c7
 python/rpmtd-py.c      |  2 +-
83a7c7
 python/rpmte-py.c      | 16 ++++++++--------
83a7c7
 python/rpmts-py.c      | 11 ++++++-----
83a7c7
 python/spec-py.c       |  8 ++++----
83a7c7
 tests/local.at         |  1 +
83a7c7
 tests/rpmpython.at     | 34 ++++++++++++++++++++++++++++++++++
83a7c7
 17 files changed, 102 insertions(+), 59 deletions(-)
83a7c7
83a7c7
diff --git a/python/header-py.c b/python/header-py.c
83a7c7
index c9d54e869..93c241cb7 100644
83a7c7
--- a/python/header-py.c
83a7c7
+++ b/python/header-py.c
83a7c7
@@ -231,7 +231,7 @@ static PyObject * hdrFormat(hdrObject * s, PyObject * args, PyObject * kwds)
83a7c7
 	return NULL;
83a7c7
     }
83a7c7
 
83a7c7
-    result = Py_BuildValue("s", r);
83a7c7
+    result = utf8FromString(r);
83a7c7
     free(r);
83a7c7
 
83a7c7
     return result;
83a7c7
diff --git a/python/rpmds-py.c b/python/rpmds-py.c
83a7c7
index 39b26628e..ecc9af9d5 100644
83a7c7
--- a/python/rpmds-py.c
83a7c7
+++ b/python/rpmds-py.c
83a7c7
@@ -31,19 +31,19 @@ rpmds_Ix(rpmdsObject * s)
83a7c7
 static PyObject *
83a7c7
 rpmds_DNEVR(rpmdsObject * s)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmdsDNEVR(s->ds));
83a7c7
+    return utf8FromString(rpmdsDNEVR(s->ds));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
 rpmds_N(rpmdsObject * s)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmdsN(s->ds));
83a7c7
+    return utf8FromString(rpmdsN(s->ds));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
 rpmds_EVR(rpmdsObject * s)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmdsEVR(s->ds));
83a7c7
+    return utf8FromString(rpmdsEVR(s->ds));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
@@ -261,7 +261,7 @@ rpmds_subscript(rpmdsObject * s, PyObject * key)
83a7c7
 
83a7c7
     ix = (int) PyInt_AsLong(key);
83a7c7
     rpmdsSetIx(s->ds, ix);
83a7c7
-    return Py_BuildValue("s", rpmdsDNEVR(s->ds));
83a7c7
+    return utf8FromString(rpmdsDNEVR(s->ds));
83a7c7
 }
83a7c7
 
83a7c7
 static PyMappingMethods rpmds_as_mapping = {
83a7c7
diff --git a/python/rpmfd-py.c b/python/rpmfd-py.c
83a7c7
index 85fb0cd24..4b05cce5f 100644
83a7c7
--- a/python/rpmfd-py.c
83a7c7
+++ b/python/rpmfd-py.c
83a7c7
@@ -327,17 +327,17 @@ static PyObject *rpmfd_get_closed(rpmfdObject *s)
83a7c7
 static PyObject *rpmfd_get_name(rpmfdObject *s)
83a7c7
 {
83a7c7
     /* XXX: rpm returns non-paths with [mumble], python files use <mumble> */
83a7c7
-    return Py_BuildValue("s", Fdescr(s->fd));
83a7c7
+    return utf8FromString(Fdescr(s->fd));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmfd_get_mode(rpmfdObject *s)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", s->mode);
83a7c7
+    return utf8FromString(s->mode);
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmfd_get_flags(rpmfdObject *s)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", s->flags);
83a7c7
+    return utf8FromString(s->flags);
83a7c7
 }
83a7c7
 
83a7c7
 static PyGetSetDef rpmfd_getseters[] = {
83a7c7
diff --git a/python/rpmfi-py.c b/python/rpmfi-py.c
83a7c7
index 8d2f926d0..db405c231 100644
83a7c7
--- a/python/rpmfi-py.c
83a7c7
+++ b/python/rpmfi-py.c
83a7c7
@@ -41,19 +41,19 @@ rpmfi_DX(rpmfiObject * s, PyObject * unused)
83a7c7
 static PyObject *
83a7c7
 rpmfi_BN(rpmfiObject * s, PyObject * unused)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfiBN(s->fi));
83a7c7
+    return utf8FromString(rpmfiBN(s->fi));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
 rpmfi_DN(rpmfiObject * s, PyObject * unused)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfiDN(s->fi));
83a7c7
+    return utf8FromString(rpmfiDN(s->fi));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
 rpmfi_FN(rpmfiObject * s, PyObject * unused)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfiFN(s->fi));
83a7c7
+    return utf8FromString(rpmfiFN(s->fi));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
@@ -98,7 +98,7 @@ rpmfi_Digest(rpmfiObject * s, PyObject * unused)
83a7c7
 {
83a7c7
     char *digest = rpmfiFDigestHex(s->fi, NULL);
83a7c7
     if (digest) {
83a7c7
-	PyObject *dig = Py_BuildValue("s", digest);
83a7c7
+	PyObject *dig = utf8FromString(digest);
83a7c7
 	free(digest);
83a7c7
 	return dig;
83a7c7
     } else {
83a7c7
@@ -109,7 +109,7 @@ rpmfi_Digest(rpmfiObject * s, PyObject * unused)
83a7c7
 static PyObject *
83a7c7
 rpmfi_FLink(rpmfiObject * s, PyObject * unused)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfiFLink(s->fi));
83a7c7
+    return utf8FromString(rpmfiFLink(s->fi));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
@@ -133,13 +133,13 @@ rpmfi_FMtime(rpmfiObject * s, PyObject * unused)
83a7c7
 static PyObject *
83a7c7
 rpmfi_FUser(rpmfiObject * s, PyObject * unused)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfiFUser(s->fi));
83a7c7
+    return utf8FromString(rpmfiFUser(s->fi));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
 rpmfi_FGroup(rpmfiObject * s, PyObject * unused)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfiFGroup(s->fi));
83a7c7
+    return utf8FromString(rpmfiFGroup(s->fi));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
@@ -155,7 +155,7 @@ rpmfi_FClass(rpmfiObject * s, PyObject * unused)
83a7c7
 
83a7c7
     if ((FClass = rpmfiFClass(s->fi)) == NULL)
83a7c7
 	FClass = "";
83a7c7
-    return Py_BuildValue("s", FClass);
83a7c7
+    return utf8FromString(FClass);
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
@@ -208,7 +208,7 @@ rpmfi_iternext(rpmfiObject * s)
83a7c7
 	    Py_INCREF(Py_None);
83a7c7
 	    PyTuple_SET_ITEM(result, 0, Py_None);
83a7c7
 	} else
83a7c7
-	    PyTuple_SET_ITEM(result,  0, Py_BuildValue("s", FN));
83a7c7
+	    PyTuple_SET_ITEM(result,  0, utf8FromString(FN));
83a7c7
 	PyTuple_SET_ITEM(result,  1, PyLong_FromLongLong(FSize));
83a7c7
 	PyTuple_SET_ITEM(result,  2, PyInt_FromLong(FMode));
83a7c7
 	PyTuple_SET_ITEM(result,  3, PyInt_FromLong(FMtime));
83a7c7
@@ -222,12 +222,12 @@ rpmfi_iternext(rpmfiObject * s)
83a7c7
 	    Py_INCREF(Py_None);
83a7c7
 	    PyTuple_SET_ITEM(result, 10, Py_None);
83a7c7
 	} else
83a7c7
-	    PyTuple_SET_ITEM(result, 10, Py_BuildValue("s", FUser));
83a7c7
+	    PyTuple_SET_ITEM(result, 10, utf8FromString(FUser));
83a7c7
 	if (FGroup == NULL) {
83a7c7
 	    Py_INCREF(Py_None);
83a7c7
 	    PyTuple_SET_ITEM(result, 11, Py_None);
83a7c7
 	} else
83a7c7
-	    PyTuple_SET_ITEM(result, 11, Py_BuildValue("s", FGroup));
83a7c7
+	    PyTuple_SET_ITEM(result, 11, utf8FromString(FGroup));
83a7c7
 	PyTuple_SET_ITEM(result, 12, rpmfi_Digest(s, NULL));
83a7c7
 
83a7c7
     } else
83a7c7
@@ -313,7 +313,7 @@ rpmfi_subscript(rpmfiObject * s, PyObject * key)
83a7c7
 
83a7c7
     ix = (int) PyInt_AsLong(key);
83a7c7
     rpmfiSetFX(s->fi, ix);
83a7c7
-    return Py_BuildValue("s", rpmfiFN(s->fi));
83a7c7
+    return utf8FromString(rpmfiFN(s->fi));
83a7c7
 }
83a7c7
 
83a7c7
 static PyMappingMethods rpmfi_as_mapping = {
83a7c7
diff --git a/python/rpmfiles-py.c b/python/rpmfiles-py.c
83a7c7
index bc07dbeaf..557246cae 100644
83a7c7
--- a/python/rpmfiles-py.c
83a7c7
+++ b/python/rpmfiles-py.c
83a7c7
@@ -41,37 +41,37 @@ static PyObject *rpmfile_dx(rpmfileObject *s)
83a7c7
 static PyObject *rpmfile_name(rpmfileObject *s)
83a7c7
 {
83a7c7
     char * fn = rpmfilesFN(s->files, s->ix);
83a7c7
-    PyObject *o = Py_BuildValue("s", fn);
83a7c7
+    PyObject *o = utf8FromString(fn);
83a7c7
     free(fn);
83a7c7
     return o;
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmfile_basename(rpmfileObject *s)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfilesBN(s->files, s->ix));
83a7c7
+    return utf8FromString(rpmfilesBN(s->files, s->ix));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmfile_dirname(rpmfileObject *s)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfilesDN(s->files, rpmfilesDI(s->files, s->ix)));
83a7c7
+    return utf8FromString(rpmfilesDN(s->files, rpmfilesDI(s->files, s->ix)));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmfile_orig_name(rpmfileObject *s)
83a7c7
 {
83a7c7
     char * fn = rpmfilesOFN(s->files, s->ix);
83a7c7
-    PyObject *o = Py_BuildValue("s", fn);
83a7c7
+    PyObject *o = utf8FromString(fn);
83a7c7
     free(fn);
83a7c7
     return o;
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmfile_orig_basename(rpmfileObject *s)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfilesOBN(s->files, s->ix));
83a7c7
+    return utf8FromString(rpmfilesOBN(s->files, s->ix));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmfile_orig_dirname(rpmfileObject *s)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfilesODN(s->files, rpmfilesODI(s->files, s->ix)));
83a7c7
+    return utf8FromString(rpmfilesODN(s->files, rpmfilesODI(s->files, s->ix)));
83a7c7
 }
83a7c7
 static PyObject *rpmfile_mode(rpmfileObject *s)
83a7c7
 {
83a7c7
@@ -105,17 +105,17 @@ static PyObject *rpmfile_nlink(rpmfileObject *s)
83a7c7
 
83a7c7
 static PyObject *rpmfile_linkto(rpmfileObject *s)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfilesFLink(s->files, s->ix));
83a7c7
+    return utf8FromString(rpmfilesFLink(s->files, s->ix));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmfile_user(rpmfileObject *s)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfilesFUser(s->files, s->ix));
83a7c7
+    return utf8FromString(rpmfilesFUser(s->files, s->ix));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmfile_group(rpmfileObject *s)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfilesFGroup(s->files, s->ix));
83a7c7
+    return utf8FromString(rpmfilesFGroup(s->files, s->ix));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmfile_fflags(rpmfileObject *s)
83a7c7
@@ -145,7 +145,7 @@ static PyObject *rpmfile_digest(rpmfileObject *s)
83a7c7
 						  NULL, &diglen);
83a7c7
     if (digest) {
83a7c7
 	char * hex = pgpHexStr(digest, diglen);
83a7c7
-	PyObject *o = Py_BuildValue("s", hex);
83a7c7
+	PyObject *o = utf8FromString(hex);
83a7c7
 	free(hex);
83a7c7
 	return o;
83a7c7
     }
83a7c7
@@ -154,17 +154,17 @@ static PyObject *rpmfile_digest(rpmfileObject *s)
83a7c7
 
83a7c7
 static PyObject *rpmfile_class(rpmfileObject *s)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfilesFClass(s->files, s->ix));
83a7c7
+    return utf8FromString(rpmfilesFClass(s->files, s->ix));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmfile_caps(rpmfileObject *s)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfilesFCaps(s->files, s->ix));
83a7c7
+    return utf8FromString(rpmfilesFCaps(s->files, s->ix));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmfile_langs(rpmfileObject *s)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmfilesFLangs(s->files, s->ix));
83a7c7
+    return utf8FromString(rpmfilesFLangs(s->files, s->ix));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmfile_links(rpmfileObject *s)
83a7c7
diff --git a/python/rpmkeyring-py.c b/python/rpmkeyring-py.c
83a7c7
index d5f131e42..8968e0513 100644
83a7c7
--- a/python/rpmkeyring-py.c
83a7c7
+++ b/python/rpmkeyring-py.c
83a7c7
@@ -38,7 +38,7 @@ static PyObject *rpmPubkey_new(PyTypeObject *subtype,
83a7c7
 static PyObject * rpmPubkey_Base64(rpmPubkeyObject *s)
83a7c7
 {
83a7c7
     char *b64 = rpmPubkeyBase64(s->pubkey);
83a7c7
-    PyObject *res = Py_BuildValue("s", b64);
83a7c7
+    PyObject *res = utf8FromString(b64);
83a7c7
     free(b64);
83a7c7
     return res;
83a7c7
 }
83a7c7
diff --git a/python/rpmmacro-py.c b/python/rpmmacro-py.c
83a7c7
index 3cb1a51f5..d8a365547 100644
83a7c7
--- a/python/rpmmacro-py.c
83a7c7
+++ b/python/rpmmacro-py.c
83a7c7
@@ -52,7 +52,7 @@ rpmmacro_ExpandMacro(PyObject * self, PyObject * args, PyObject * kwds)
83a7c7
 	if (rpmExpandMacros(NULL, macro, &str, 0) < 0)
83a7c7
 	    PyErr_SetString(pyrpmError, "error expanding macro");
83a7c7
 	else
83a7c7
-	    res = Py_BuildValue("s", str);
83a7c7
+	    res = utf8FromString(str);
83a7c7
 	free(str);
83a7c7
     }
83a7c7
     return res;
83a7c7
diff --git a/python/rpmmodule.c b/python/rpmmodule.c
83a7c7
index 3faad23c7..05032edc7 100644
83a7c7
--- a/python/rpmmodule.c
83a7c7
+++ b/python/rpmmodule.c
83a7c7
@@ -237,7 +237,7 @@ static void addRpmTags(PyObject *module)
83a7c7
 
83a7c7
 	PyModule_AddIntConstant(module, tagname, tagval);
83a7c7
 	pyval = PyInt_FromLong(tagval);
83a7c7
-	pyname = Py_BuildValue("s", shortname);
83a7c7
+	pyname = utf8FromString(shortname);
83a7c7
 	PyDict_SetItem(dict, pyval, pyname);
83a7c7
 	Py_DECREF(pyval);
83a7c7
 	Py_DECREF(pyname);
83a7c7
diff --git a/python/rpmps-py.c b/python/rpmps-py.c
83a7c7
index bdc899a60..902b2ae63 100644
83a7c7
--- a/python/rpmps-py.c
83a7c7
+++ b/python/rpmps-py.c
83a7c7
@@ -18,12 +18,12 @@ static PyObject *rpmprob_get_type(rpmProblemObject *s, void *closure)
83a7c7
 
83a7c7
 static PyObject *rpmprob_get_pkgnevr(rpmProblemObject *s, void *closure)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmProblemGetPkgNEVR(s->prob));
83a7c7
+    return utf8FromString(rpmProblemGetPkgNEVR(s->prob));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmprob_get_altnevr(rpmProblemObject *s, void *closure)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmProblemGetAltNEVR(s->prob));
83a7c7
+    return utf8FromString(rpmProblemGetAltNEVR(s->prob));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmprob_get_key(rpmProblemObject *s, void *closure)
83a7c7
@@ -38,7 +38,7 @@ static PyObject *rpmprob_get_key(rpmProblemObject *s, void *closure)
83a7c7
 
83a7c7
 static PyObject *rpmprob_get_str(rpmProblemObject *s, void *closure)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmProblemGetStr(s->prob));
83a7c7
+    return utf8FromString(rpmProblemGetStr(s->prob));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *rpmprob_get_num(rpmProblemObject *s, void *closure)
83a7c7
@@ -59,7 +59,7 @@ static PyGetSetDef rpmprob_getseters[] = {
83a7c7
 static PyObject *rpmprob_str(rpmProblemObject *s)
83a7c7
 {
83a7c7
     char *str = rpmProblemString(s->prob);
83a7c7
-    PyObject *res = Py_BuildValue("s", str);
83a7c7
+    PyObject *res = utf8FromString(str);
83a7c7
     free(str);
83a7c7
     return res;
83a7c7
 }
83a7c7
diff --git a/python/rpmstrpool-py.c b/python/rpmstrpool-py.c
83a7c7
index 356bd1de5..a56e2b540 100644
83a7c7
--- a/python/rpmstrpool-py.c
83a7c7
+++ b/python/rpmstrpool-py.c
83a7c7
@@ -44,7 +44,7 @@ static PyObject *strpool_id2str(rpmstrPoolObject *s, PyObject *item)
83a7c7
 	const char *str = rpmstrPoolStr(s->pool, id);
83a7c7
 
83a7c7
 	if (str)
83a7c7
-	    ret = PyBytes_FromString(str);
83a7c7
+	    ret = utf8FromString(str);
83a7c7
 	else 
83a7c7
 	    PyErr_SetObject(PyExc_KeyError, item);
83a7c7
     }
83a7c7
diff --git a/python/rpmsystem-py.h b/python/rpmsystem-py.h
83a7c7
index 955d60cd3..87c750571 100644
83a7c7
--- a/python/rpmsystem-py.h
83a7c7
+++ b/python/rpmsystem-py.h
83a7c7
@@ -19,4 +19,11 @@
83a7c7
 #define PyInt_AsSsize_t PyLong_AsSsize_t
83a7c7
 #endif
83a7c7
 
83a7c7
+/* In Python 3, we return all strings as surrogate-escaped utf-8 */
83a7c7
+#if PY_MAJOR_VERSION >= 3
83a7c7
+#define utf8FromString(_s) PyUnicode_DecodeUTF8(_s, strlen(_s), "surrogateescape")
83a7c7
+#else
83a7c7
+#define utf8FromString(_s) PyBytes_FromString(_s)
83a7c7
+#endif
83a7c7
+
83a7c7
 #endif	/* H_SYSTEM_PYTHON */
83a7c7
diff --git a/python/rpmtd-py.c b/python/rpmtd-py.c
83a7c7
index 247c7502a..23ca10517 100644
83a7c7
--- a/python/rpmtd-py.c
83a7c7
+++ b/python/rpmtd-py.c
83a7c7
@@ -17,7 +17,7 @@ PyObject * rpmtd_ItemAsPyobj(rpmtd td, rpmTagClass tclass)
83a7c7
 
83a7c7
     switch (tclass) {
83a7c7
     case RPM_STRING_CLASS:
83a7c7
-	res = PyBytes_FromString(rpmtdGetString(td));
83a7c7
+	res = utf8FromString(rpmtdGetString(td));
83a7c7
 	break;
83a7c7
     case RPM_NUMERIC_CLASS:
83a7c7
 	res = PyLong_FromLongLong(rpmtdGetNumber(td));
83a7c7
diff --git a/python/rpmte-py.c b/python/rpmte-py.c
83a7c7
index 99ff2f496..2b3745754 100644
83a7c7
--- a/python/rpmte-py.c
83a7c7
+++ b/python/rpmte-py.c
83a7c7
@@ -54,49 +54,49 @@ rpmte_TEType(rpmteObject * s, PyObject * unused)
83a7c7
 static PyObject *
83a7c7
 rpmte_N(rpmteObject * s, PyObject * unused)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmteN(s->te));
83a7c7
+    return utf8FromString(rpmteN(s->te));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
 rpmte_E(rpmteObject * s, PyObject * unused)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmteE(s->te));
83a7c7
+    return utf8FromString(rpmteE(s->te));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
 rpmte_V(rpmteObject * s, PyObject * unused)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmteV(s->te));
83a7c7
+    return utf8FromString(rpmteV(s->te));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
 rpmte_R(rpmteObject * s, PyObject * unused)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmteR(s->te));
83a7c7
+    return utf8FromString(rpmteR(s->te));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
 rpmte_A(rpmteObject * s, PyObject * unused)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmteA(s->te));
83a7c7
+    return utf8FromString(rpmteA(s->te));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
 rpmte_O(rpmteObject * s, PyObject * unused)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmteO(s->te));
83a7c7
+    return utf8FromString(rpmteO(s->te));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
 rpmte_NEVR(rpmteObject * s, PyObject * unused)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmteNEVR(s->te));
83a7c7
+    return utf8FromString(rpmteNEVR(s->te));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
 rpmte_NEVRA(rpmteObject * s, PyObject * unused)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmteNEVRA(s->te));
83a7c7
+    return utf8FromString(rpmteNEVRA(s->te));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
diff --git a/python/rpmts-py.c b/python/rpmts-py.c
83a7c7
index 1ddfc9a1e..96e3bb28e 100644
83a7c7
--- a/python/rpmts-py.c
83a7c7
+++ b/python/rpmts-py.c
83a7c7
@@ -230,8 +230,9 @@ rpmts_SolveCallback(rpmts ts, rpmds ds, const void * data)
83a7c7
 
83a7c7
     PyEval_RestoreThread(cbInfo->_save);
83a7c7
 
83a7c7
-    args = Py_BuildValue("(Oissi)", cbInfo->tso,
83a7c7
-		rpmdsTagN(ds), rpmdsN(ds), rpmdsEVR(ds), rpmdsFlags(ds));
83a7c7
+    args = Py_BuildValue("(OiNNi)", cbInfo->tso,
83a7c7
+		rpmdsTagN(ds), utf8FromString(rpmdsN(ds)),
83a7c7
+		utf8FromString(rpmdsEVR(ds)), rpmdsFlags(ds));
83a7c7
     result = PyEval_CallObject(cbInfo->cb, args);
83a7c7
     Py_DECREF(args);
83a7c7
 
83a7c7
@@ -409,7 +410,7 @@ rpmts_HdrCheck(rpmtsObject * s, PyObject *obj)
83a7c7
     rpmrc = headerCheck(s->ts, uh, uc, &msg;;
83a7c7
     Py_END_ALLOW_THREADS;
83a7c7
 
83a7c7
-    return Py_BuildValue("(is)", rpmrc, msg);
83a7c7
+    return Py_BuildValue("(iN)", rpmrc, utf8FromString(msg));
83a7c7
 }
83a7c7
 
83a7c7
 static PyObject *
83a7c7
@@ -500,7 +501,7 @@ rpmtsCallback(const void * hd, const rpmCallbackType what,
83a7c7
     /* Synthesize a python object for callback (if necessary). */
83a7c7
     if (pkgObj == NULL) {
83a7c7
 	if (h) {
83a7c7
-	    pkgObj = Py_BuildValue("s", headerGetString(h, RPMTAG_NAME));
83a7c7
+	    pkgObj = utf8FromString(headerGetString(h, RPMTAG_NAME));
83a7c7
 	} else {
83a7c7
 	    pkgObj = Py_None;
83a7c7
 	    Py_INCREF(pkgObj);
83a7c7
@@ -845,7 +846,7 @@ static PyObject *rpmts_get_tid(rpmtsObject *s, void *closure)
83a7c7
 
83a7c7
 static PyObject *rpmts_get_rootDir(rpmtsObject *s, void *closure)
83a7c7
 {
83a7c7
-    return Py_BuildValue("s", rpmtsRootDir(s->ts));
83a7c7
+    return utf8FromString(rpmtsRootDir(s->ts));
83a7c7
 }
83a7c7
 
83a7c7
 static int rpmts_set_scriptFd(rpmtsObject *s, PyObject *value, void *closure)
83a7c7
diff --git a/python/spec-py.c b/python/spec-py.c
83a7c7
index 4efdbf4bf..70b796531 100644
83a7c7
--- a/python/spec-py.c
83a7c7
+++ b/python/spec-py.c
83a7c7
@@ -57,7 +57,7 @@ static PyObject *pkgGetSection(rpmSpecPkg pkg, int section)
83a7c7
 {
83a7c7
     char *sect = rpmSpecPkgGetSection(pkg, section);
83a7c7
     if (sect != NULL) {
83a7c7
-        PyObject *ps = PyBytes_FromString(sect);
83a7c7
+        PyObject *ps = utf8FromString(sect);
83a7c7
         free(sect);
83a7c7
         if (ps != NULL)
83a7c7
             return ps;
83a7c7
@@ -158,7 +158,7 @@ static PyObject * getSection(rpmSpec spec, int section)
83a7c7
 {
83a7c7
     const char *sect = rpmSpecGetSection(spec, section);
83a7c7
     if (sect) {
83a7c7
-	return Py_BuildValue("s", sect);
83a7c7
+	return utf8FromString(sect);
83a7c7
     }
83a7c7
     Py_RETURN_NONE;
83a7c7
 }
83a7c7
@@ -208,8 +208,8 @@ static PyObject * spec_get_sources(specObject *s, void *closure)
83a7c7
 
83a7c7
     rpmSpecSrcIter iter = rpmSpecSrcIterInit(s->spec);
83a7c7
     while ((source = rpmSpecSrcIterNext(iter)) != NULL) {
83a7c7
-	PyObject *srcUrl = Py_BuildValue("(sii)",
83a7c7
-				rpmSpecSrcFilename(source, 1),
83a7c7
+	PyObject *srcUrl = Py_BuildValue("(Nii)",
83a7c7
+				utf8FromString(rpmSpecSrcFilename(source, 1)),
83a7c7
 				rpmSpecSrcNum(source),
83a7c7
 				rpmSpecSrcFlags(source)); 
83a7c7
         if (!srcUrl) {
83a7c7
diff --git a/tests/local.at b/tests/local.at
83a7c7
index 02ead66c9..42eef1c75 100644
83a7c7
--- a/tests/local.at
83a7c7
+++ b/tests/local.at
83a7c7
@@ -10,6 +10,7 @@ rm -rf "${abs_builddir}"/testing`rpm --eval '%_dbpath'`/*
83a7c7
 
83a7c7
 m4_define([RPMPY_RUN],[[
83a7c7
 cat << EOF > test.py
83a7c7
+# coding=utf-8
83a7c7
 import rpm, sys
83a7c7
 dbpath=rpm.expandMacro('%_dbpath')
83a7c7
 rpm.addMacro('_dbpath', '${abs_builddir}/testing%s' % dbpath)
83a7c7
diff --git a/tests/rpmpython.at b/tests/rpmpython.at
83a7c7
index ff77f868c..58f3e84a6 100644
83a7c7
--- a/tests/rpmpython.at
83a7c7
+++ b/tests/rpmpython.at
83a7c7
@@ -106,6 +106,25 @@ None
83a7c7
 'rpm.hdr' object has no attribute '__foo__']
83a7c7
 )
83a7c7
 
83a7c7
+RPMPY_TEST([non-utf8 data in header],[
83a7c7
+str = u'älämölö'
83a7c7
+enc = 'iso-8859-1'
83a7c7
+b = str.encode(enc)
83a7c7
+h = rpm.hdr()
83a7c7
+h['group'] = b
83a7c7
+d = h['group']
83a7c7
+try:
83a7c7
+    # python 3
83a7c7
+    t = bytes(d, 'utf-8', 'surrogateescape')
83a7c7
+except TypeError:
83a7c7
+    # python 2
83a7c7
+    t = bytes(d)
83a7c7
+res = t.decode(enc)
83a7c7
+myprint(str == res)
83a7c7
+],
83a7c7
+[True]
83a7c7
+)
83a7c7
+
83a7c7
 RPMPY_TEST([invalid header data],[
83a7c7
 h1 = rpm.hdr()
83a7c7
 h1['basenames'] = ['bing', 'bang', 'bong']
83a7c7
@@ -125,6 +144,21 @@ for h in [h1, h2]:
83a7c7
 /opt/bing,/opt/bang,/flopt/bong]
83a7c7
 )
83a7c7
 
83a7c7
+RPMPY_TEST([labelCompare],[
83a7c7
+v = '1.0'
83a7c7
+r = '1'
83a7c7
+e = 3
83a7c7
+h = rpm.hdr()
83a7c7
+h['name'] = 'testpkg'
83a7c7
+h['version'] = v
83a7c7
+h['release'] = r
83a7c7
+h['epoch'] = e
83a7c7
+myprint(rpm.labelCompare((str(h['epoch']), h['version'], h['release']),
83a7c7
+			 (str(e), v, r)))
83a7c7
+],
83a7c7
+[0]
83a7c7
+)
83a7c7
+
83a7c7
 RPMPY_TEST([vfyflags API],[
83a7c7
 ts = rpm.ts()
83a7c7
 dlv = ts.getVfyFlags()
83a7c7
-- 
83a7c7
2.20.1
83a7c7