diff --git a/coreutils-i18n-un-expand-BOM.patch b/coreutils-i18n-un-expand-BOM.patch new file mode 100644 index 0000000..44769c6 --- /dev/null +++ b/coreutils-i18n-un-expand-BOM.patch @@ -0,0 +1,443 @@ +diff -up ./src/expand-core.c.orig ./src/expand-core.c +--- ./src/expand-core.c.orig 2016-06-28 14:44:18.281619000 +0200 ++++ ./src/expand-core.c 2016-06-30 11:46:50.025109755 +0200 +@@ -18,6 +18,7 @@ + + #include + #include ++#include + + #include "system.h" + #include "error.h" +@@ -27,6 +28,119 @@ + + #include "expand-core.h" + ++extern inline int ++set_utf_locale (void) ++{ ++ /*try using some predefined locale */ ++ const char* predef_locales[] = {"C.UTF8","en_US.UTF8","en_GB.UTF8"}; ++ ++ const int predef_locales_count=3; ++ for (int i=0;ibufcount=0; ++ if (c == 0xEF) ++ { ++ c=fgetc(fp); ++ } ++ else ++ { ++ if (c != EOF) ++ { ++ ungetc(c,fp); ++ } ++ return false; ++ } ++ ++ if (c == 0xBB) ++ { ++ c=fgetc(fp); ++ } ++ else ++ { ++ if ( c!= EOF ) ++ { ++ mbf->buf[0]=(unsigned char) 0xEF; ++ mbf->bufcount=1; ++ ungetc(c,fp); ++ return false; ++ } ++ else ++ { ++ ungetc(0xEF,fp); ++ return false; ++ } ++ } ++ if (c == 0xBF) ++ { ++ mbf->bufcount=0; ++ return true; ++ } ++ else ++ { ++ if (c != EOF) ++ { ++ mbf->buf[0]=(unsigned char) 0xEF; ++ mbf->buf[1]=(unsigned char) 0xBB; ++ mbf->bufcount=2; ++ ungetc(c,fp); ++ return false; ++ } ++ else ++ { ++ mbf->buf[0]=(unsigned char) 0xEF; ++ mbf->bufcount=1; ++ ungetc(0xBB,fp); ++ return false; ++ } ++ } ++ return false; ++} ++ ++extern inline void ++print_bom(void) ++{ ++ putc (0xEF, stdout); ++ putc (0xBB, stdout); ++ putc (0xBF, stdout); ++} ++ + /* Add the comma or blank separated list of tab stops STOPS + to the list of tab stops. */ + +diff -up ./src/expand-core.h.orig ./src/expand-core.h +--- ./src/expand-core.h.orig 2016-06-28 14:44:18.281619000 +0200 ++++ ./src/expand-core.h 2016-06-30 11:47:18.929437205 +0200 +@@ -15,7 +15,7 @@ + along with this program. If not, see . */ + + #ifndef EXPAND_CORE_H_ +-# define EXPAND_CORE_H_ ++#define EXPAND_CORE_H_ + + extern size_t first_free_tab; + +@@ -29,6 +29,18 @@ extern char **file_list; + + extern bool have_read_stdin; + ++inline int ++set_utf_locale (void); ++ ++bool ++check_utf_locale(void); ++ ++bool ++check_bom(FILE* fp, mb_file_t *mbf); ++ ++inline void ++print_bom(void); ++ + void + parse_tab_stops (char const *stops, void (*add_tab_stop)(uintmax_t)); + +diff -up ./src/expand.c.orig ./src/expand.c +--- ./src/expand.c.orig 2016-06-28 14:44:18.286619000 +0200 ++++ ./src/expand.c 2016-06-30 11:50:15.077312947 +0200 +@@ -149,11 +149,33 @@ expand (void) + FILE *fp = next_file (NULL); + mb_file_t mbf; + mbf_char_t c; +- ++ /* True if the starting locale is utf8. */ ++ bool using_utf_locale; ++ ++ /* True if the first file contains BOM header. */ ++ bool found_bom; ++ using_utf_locale=check_utf_locale(); ++ + if (!fp) + return; +- + mbf_init (mbf, fp); ++ found_bom=check_bom(fp,&mbf); ++ ++ if (using_utf_locale == false && found_bom == true) ++ { ++ /*try using some predefined locale */ ++ ++ if (set_utf_locale () != 0) ++ { ++ error (EXIT_FAILURE, errno, _("cannot set UTF-8 locale")); ++ } ++ } ++ ++ ++ if (found_bom == true) ++ { ++ print_bom(); ++ } + + while (true) + { +@@ -178,6 +200,27 @@ expand (void) + if ((mb_iseof (c)) && (fp = next_file (fp))) + { + mbf_init (mbf, fp); ++ if (fp!=NULL) ++ { ++ if (check_bom(fp,&mbf)==true) ++ { ++ /*Not the first file - check BOM header*/ ++ if (using_utf_locale==false && found_bom==false) ++ { ++ /*BOM header in subsequent file but not in the first one. */ ++ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header")); ++ } ++ } ++ else ++ { ++ if(using_utf_locale==false && found_bom==true) ++ { ++ /*First file conatined BOM header - locale was switched to UTF ++ /*all subsequent files should contain BOM. */ ++ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header")); ++ } ++ } ++ } + continue; + } + else +diff -up ./src/unexpand.c.orig ./src/unexpand.c +--- ./src/unexpand.c.orig 2016-06-28 17:39:22.894259000 +0200 ++++ ./src/unexpand.c 2016-07-07 09:48:07.659924755 +0200 +@@ -172,16 +172,36 @@ unexpand (void) + include characters other than spaces, so the blanks must be + stored, not merely counted. */ + mbf_char_t *pending_blank; ++ /* True if the starting locale is utf8. */ ++ bool using_utf_locale; ++ ++ /* True if the first file contains BOM header. */ ++ bool found_bom; ++ using_utf_locale=check_utf_locale(); + + if (!fp) + return; ++ mbf_init (mbf, fp); ++ found_bom=check_bom(fp,&mbf); + ++ if (using_utf_locale == false && found_bom == true) ++ { ++ /*try using some predefined locale */ ++ ++ if (set_utf_locale () != 0) ++ { ++ error (EXIT_FAILURE, errno, _("cannot set UTF-8 locale")); ++ } ++ } + /* The worst case is a non-blank character, then one blank, then a + tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so + allocate MAX_COLUMN_WIDTH bytes to store the blanks. */ + pending_blank = xmalloc (max_column_width * sizeof (mbf_char_t)); + +- mbf_init (mbf, fp); ++ if (found_bom == true) ++ { ++ print_bom(); ++ } + + while (true) + { +@@ -225,6 +245,27 @@ unexpand (void) + if ((mb_iseof (c)) && (fp = next_file (fp))) + { + mbf_init (mbf, fp); ++ if (fp!=NULL) ++ { ++ if (check_bom(fp,&mbf)==true) ++ { ++ /*Not the first file - check BOM header*/ ++ if (using_utf_locale==false && found_bom==false) ++ { ++ /*BOM header in subsequent file but not in the first one. */ ++ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header")); ++ } ++ } ++ else ++ { ++ if(using_utf_locale==false && found_bom==true) ++ { ++ /*First file conatined BOM header - locale was switched to UTF ++ /*all subsequent files should contain BOM. */ ++ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header")); ++ } ++ } ++ } + continue; + } + else +diff -up ./tests/expand/mb.sh.orig ./tests/expand/mb.sh +--- ./tests/expand/mb.sh.orig 2016-06-28 14:44:18.287619000 +0200 ++++ ./tests/expand/mb.sh 2016-06-30 11:57:10.038407216 +0200 +@@ -109,4 +109,75 @@ äbcdef\xFF | + expand < in > out || fail=1 + compare exp out > /dev/null 2>&1 || fail=1 + ++ ++ ++#BOM header test 1 ++printf "\xEF\xBB\xBF" > in; cat <<\EOF >> in || framework_failure_ ++1234567812345678123456781 ++. . . . ++a b c d ++. . . . ++ä ö ü ß ++. . . . ++EOF ++env printf ' äöü\t. öüä. \tä xx\n' >> in || framework_failure_ ++ ++printf "\xEF\xBB\xBF" > exp; cat <<\EOF >> exp || framework_failure_ ++1234567812345678123456781 ++. . . . ++a b c d ++. . . . ++ä ö ü ß ++. . . . ++ äöü . öüä. ä xx ++EOF ++ ++ ++expand < in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++LANG=C expand < in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++LC_ALL=C expand < in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++ ++printf '\xEF\xBB\xBF' > in1; cat <<\EOF >> in1 || framework_failure_ ++1234567812345678123456781 ++. . . . ++a b c d ++. . . . ++ä ö ü ß ++. . . . ++EOF ++env printf ' äöü\t. öüä. \tä xx\n' >> in1 || framework_failure_ ++ ++ ++printf '\xEF\xBB\xBF' > exp; cat <<\EOF >> exp || framework_failure_ ++1234567812345678123456781 ++. . . . ++a b c d ++. . . . ++ä ö ü ß ++. . . . ++ äöü . öüä. ä xx ++1234567812345678123456781 ++. . . . ++a b c d ++. . . . ++ä ö ü ß ++. . . . ++ äöü . öüä. ä xx ++EOF ++ ++expand in1 in1 > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++LANG=C expand in1 in1 > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++LC_ALL=C expand in1 in1 > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ + exit $fail +diff -up ./tests/unexpand/mb.sh.orig ./tests/unexpand/mb.sh +--- ./tests/unexpand/mb.sh.orig 2016-06-28 17:39:22.895259000 +0200 ++++ ./tests/unexpand/mb.sh 2016-07-07 09:55:00.098281917 +0200 +@@ -111,3 +111,62 @@ äbcdef\xFF\t| + + unexpand -a < in > out || fail=1 + compare exp out > /dev/null 2>&1 || fail=1 ++ ++#BOM header test 1 ++printf "\xEF\xBB\xBF" > in; cat <<\EOF >> in || framework_failure_ ++1234567812345678123456781 ++. . . . ++a b c d ++. . . . ++ä ö ü ß ++. . . . ++ äöü . öüä. ä xx ++EOF ++env printf ' äöü\t. öüä. \tä xx\n' >> in || framework_failure_ ++ ++printf "\xEF\xBB\xBF" > exp; cat <<\EOF >> exp || framework_failure_ ++1234567812345678123456781 ++. . . . ++a b c d ++. . . . ++ä ö ü ß ++. . . . ++ äöü . öüä. ä xx ++EOF ++ ++unexpand < in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++LANG=C unexpand < in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++LC_ALL=C unexpand < in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++ ++printf "\xEF\xBB\xBF" > exp; cat <<\EOF >> exp || framework_failure_ ++1234567812345678123456781 ++. . . . ++a b c d ++. . . . ++ä ö ü ß ++. . . . ++ äöü . öüä. ä xx ++1234567812345678123456781 ++. . . . ++a b c d ++. . . . ++ä ö ü ß ++. . . . ++ äöü . öüä. ä xx ++EOF ++ ++ ++unexpand in in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++LANG=C unexpand in in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 ++ ++LC_ALL=C unexpand in in > out || fail=1 ++compare exp out > /dev/null 2>&1 || fail=1 diff --git a/coreutils.spec b/coreutils.spec index 4abace5..21ec798 100644 --- a/coreutils.spec +++ b/coreutils.spec @@ -1,7 +1,7 @@ Summary: A set of basic GNU tools commonly used in shell scripts Name: coreutils Version: 8.25 -Release: 10%{?dist} +Release: 11%{?dist} License: GPLv3+ Group: System Environment/Base Url: http://www.gnu.org/software/coreutils/ @@ -50,6 +50,8 @@ Patch804: coreutils-i18n-cut-old.patch Patch803: coreutils-i18n-fix-unexpand.patch #(un)expand - allow multiple files on input - broken by patch 801 Patch805: coreutils-i18n-fix2-expand-unexpand.patch +#(un)expand - test BOM headers +Patch806: coreutils-i18n-un-expand-BOM.patch #getgrouplist() patch from Ulrich Drepper. Patch908: coreutils-getgrouplist.patch @@ -192,6 +194,7 @@ tee DIR_COLORS{,.256color,.lightbgcolor} < src/dircolors.hin %patch803 -p1 -b .i18n-fix-expand %patch804 -p1 -b .i18n-cutold %patch805 -p1 -b .i18n-fix2-expand-unexpand +%patch806 -p1 -b .i18n-BOM-expand-unexpand # Coreutils %patch908 -p1 -b .getgrouplist @@ -351,6 +354,12 @@ fi %license COPYING %changelog +* Thu Jul 07 2016 Jakub Martisko - 8.25-10 +- switch to UTF8 locale when (un)expand input contains BOM header + (#1158494) +- fixed regression where (un)expand would end with "long input line" + error when BOM header is present + * Fri Jun 24 2016 Ondrej Vasik - 8.25-10 - change way of detection of interactive shell in colorls.sh script (#1321648)