diff --git a/coreutils-i18n-cut.patch b/coreutils-i18n-cut.patch new file mode 100644 index 0000000..b300eac --- /dev/null +++ b/coreutils-i18n-cut.patch @@ -0,0 +1,583 @@ +--- coreutils-8.24/src/cut.c 2015-06-26 19:05:22.000000000 +0200 ++++ cut.c 2016-01-15 10:15:04.863804121 +0100 +@@ -28,6 +28,11 @@ + #include + #include + #include ++ ++#include ++#include ++#include ++ + #include "system.h" + + #include "error.h" +@@ -90,25 +95,16 @@ add_range_pair (size_t lo, size_t hi) + ++n_rp; + } + +-/* This buffer is used to support the semantics of the -s option +- (or lack of same) when the specified field list includes (does +- not include) the first field. In both of those cases, the entire +- first field must be read into this buffer to determine whether it +- is followed by a delimiter or a newline before any of it may be +- output. Otherwise, cut_fields can do the job without using this +- buffer. */ +-static char *field_1_buffer; +- +-/* The number of bytes allocated for FIELD_1_BUFFER. */ +-static size_t field_1_bufsize; +- + enum operating_mode + { + undefined_mode, + +- /* Output characters that are in the given bytes. */ ++ /* Output the given bytes. */ + byte_mode, + ++ /* Output characters that are in the given positions . */ ++ char_mode, ++ + /* Output the given delimiter-separated fields. */ + field_mode + }; +@@ -120,12 +116,16 @@ static enum operating_mode operating_mod + with field mode. */ + static bool suppress_non_delimited; + ++/* Unless true, we do not recognize multibyte characters in byte-splitting ++ mode. */ ++static bool no_break_mb_chars; ++ + /* If true, print all bytes, characters, or fields _except_ + those that were specified. */ + static bool complement; + + /* The delimiter character for field mode. */ +-static unsigned char delim; ++static mbf_char_t delim; + + /* True if the --output-delimiter=STRING option was specified. */ + static bool output_delimiter_specified; +@@ -135,7 +135,7 @@ static size_t output_delimiter_length; + + /* The output field separator string. Defaults to the 1-character + string consisting of the input delimiter. */ +-static char *output_delimiter_string; ++static char const *output_delimiter_string; + + /* True if we have ever read standard input. */ + static bool have_read_stdin; +@@ -189,7 +189,7 @@ Print selected parts of lines from each + -f, --fields=LIST select only these fields; also print any line\n\ + that contains no delimiter character, unless\n\ + the -s option is specified\n\ +- -n (ignored)\n\ ++ -n with -b, don't split multibyte characters\n\ + "), stdout); + fputs (_("\ + --complement complement the set of selected bytes, characters\n\ +@@ -435,6 +435,12 @@ next_item (size_t *item_idx) + current_rp++; + } + ++static inline void ++next_item_n (size_t *item_idx, size_t n) ++{ ++ while (n-- > 0) ++ next_item (item_idx); ++} + /* Return nonzero if the K'th field or byte is printable. */ + + static inline bool +@@ -443,6 +449,15 @@ print_kth (size_t k) + return current_rp->lo <= k; + } + ++/* The lo and hi params should be used for the current characters byte position ++ * and byte size, respectively. */ ++static inline bool ++rp_intersect (size_t lo, size_t hi) ++{ ++ return ((current_rp->lo <= lo && current_rp->hi >= lo) ++ || (current_rp->lo <= hi && current_rp->hi >= hi)); ++} ++ + /* Return nonzero if K'th byte is the beginning of a range. */ + + static inline bool +@@ -505,23 +520,216 @@ cut_bytes (FILE *stream) + } + + /* Read from stream STREAM, printing to standard output any selected fields. */ ++extern ssize_t ++mb_getndelim2 (mbf_char_t **lineptr, size_t *linesize, size_t nmax, ++ mbf_char_t delim1, mbf_char_t delim2, mb_file_t *stream) ++{ ++/* The maximum value that getndelim2 can return without suffering from ++ overflow problems, either internally (because of pointer ++ subtraction overflow) or due to the API (because of ssize_t). */ ++#define GETNDELIM2_MAXIMUM (PTRDIFF_MAX < SSIZE_MAX ? PTRDIFF_MAX : SSIZE_MAX) ++ ++/* Try to add at least this many bytes when extending the buffer. ++ MIN_CHUNK must be no greater than GETNDELIM2_MAXIMUM. */ ++#define MIN_CHUNK 64 ++ size_t nchars_avail; /* Allocated but unused chars in *LINEPTR. */ ++ mbf_char_t *read_pos; /* Where we're reading into *LINEPTR. */ ++ ssize_t chars_stored = -1; ++ mbf_char_t *ptr = *lineptr; ++ size_t size = *linesize; ++ bool found_delimiter; ++ ++ if (!ptr) ++ { ++ size = nmax < MIN_CHUNK ? nmax : MIN_CHUNK; ++ ptr = malloc (size * sizeof (mbf_char_t)); ++ if (!ptr) ++ return -1; ++ } ++ ++ if (size < 0) ++ goto done; ++ ++ nchars_avail = size; ++ read_pos = ptr; ++ ++ if (nchars_avail == 0 && nmax <= size) ++ goto done; ++ ++ /* Normalize delimiters, since memchr2 doesn't handle EOF. */ ++ if (mb_iseof (delim1)) ++ mb_copy (&delim1, &delim2); ++ else if (mb_iseof (delim2)) ++ mb_copy (&delim2, &delim1); ++ ++ flockfile (stream); ++ ++ found_delimiter = false; ++ do ++ { ++ /* Here always ptr + size == read_pos + nchars_avail. ++ Also nchars_avail > 0 || size < nmax. */ ++ ++ mbf_char_t c IF_LINT (= 0); ++ { ++ mbf_getc (c, *stream); ++ if (mb_iseof (c)) ++ { ++ /* Return partial line, if any. */ ++ if (read_pos == ptr) ++ goto unlock_done; ++ else ++ break; ++ } ++ if (mb_equal (c, delim1) || mb_equal (c, delim2)) ++ found_delimiter = true; ++ } ++ ++ /* We always want at least one byte left in the buffer, since we ++ always (unless we get an error while reading the first byte) ++ NUL-terminate the line buffer. */ ++ ++ if (!nchars_avail) ++ { ++ /* Grow size proportionally, not linearly, to avoid O(n^2) ++ running time. */ ++ size_t newsize = size < MIN_CHUNK ? size + MIN_CHUNK : 2 * size; ++ mbf_char_t *newptr; ++ ++ /* Respect nmax. This handles possible integer overflow. */ ++ if (! (size < newsize && newsize <= nmax)) ++ newsize = nmax; ++ ++ if (GETNDELIM2_MAXIMUM < newsize) ++ { ++ size_t newsizemax = GETNDELIM2_MAXIMUM + 1; ++ if (size == newsizemax) ++ goto unlock_done; ++ newsize = newsizemax; ++ } ++ nchars_avail = newsize - (read_pos - ptr); ++ newptr = realloc (ptr, newsize * sizeof (mbf_char_t)); ++ if (!newptr) ++ goto unlock_done; ++ ptr = newptr; ++ size = newsize; ++ read_pos = size - nchars_avail + ptr; ++ } ++ ++ /* Here, if size < nmax, nchars_avail >= buffer_len + 1. ++ If size == nmax, nchars_avail > 0. */ ++ ++ if (1 < nchars_avail) ++ { ++ mb_copy(read_pos++, &c); ++ --nchars_avail; ++ } ++ ++ } ++ while (!found_delimiter); ++ ++ chars_stored = (read_pos - ptr); ++ ++ unlock_done: ++ funlockfile (stream); ++ ++ done: ++ *lineptr = ptr; ++ *linesize = size; ++ return chars_stored; ++} ++ ++static void ++cut_chars (FILE *stream) ++{ ++ size_t char_idx; /* Number of chars in the line so far. */ ++ bool print_delimiter; ++ mbf_char_t c; ++ mb_file_t mbf; ++ ++ print_delimiter = false; ++ char_idx = 0; ++ current_rp = rp; ++ ++ mbf_init (mbf, stream); ++ while (true) ++ { ++ mbf_getc (c, mbf); ++ ++ if (mb_iseq (c, '\n')) ++ { ++ putc ('\n', stdout); ++ char_idx = 0; ++ print_delimiter = false; ++ current_rp = rp; ++ } ++ else if (mb_iseof (c)) ++ { ++ if (char_idx > 0) ++ putc ('\n', stdout); ++ break; ++ } ++ else ++ { ++ /* Forward by one byte. */ ++ next_item (&char_idx); ++ ++ /* Check if the current characters byte range is within ++ * the argument list. */ ++ if (rp_intersect (char_idx, char_idx + mb_len (c) - 1)) ++ { ++ if (output_delimiter_specified) ++ { ++ if (print_delimiter && is_range_start_index (char_idx)) ++ { ++ fwrite (output_delimiter_string, sizeof (char), ++ output_delimiter_length, stdout); ++ } ++ print_delimiter = true; ++ } ++ mb_putc (c, stdout); ++ } ++ ++ /* Byte mode with multibyte characters uncut (-b -n). */ ++ if (no_break_mb_chars) ++ /* Forward by an additional byte_length (c) - 1. */ ++ next_item_n (&char_idx, mb_len (c) - 1); ++ } ++ } ++} + + static void + cut_fields (FILE *stream) + { +- int c; ++ ++ /* This buffer is used to support the semantics of the -s option ++ (or lack of same) when the specified field list includes (does ++ not include) the first field. In both of those cases, the entire ++ first field must be read into this buffer to determine whether it ++ is followed by a delimiter or a newline before any of it may be ++ output. Otherwise, cut_fields can do the job without using this ++ buffer. */ ++ mbf_char_t *field_1_buffer = 0; ++ /* The number of bytes allocated for FIELD_1_BUFFER. */ ++ size_t field_1_bufsize; ++ ++ ++ mbf_char_t c, d; ++ mb_file_t mbf; + size_t field_idx = 1; + bool found_any_selected_field = false; + bool buffer_first_field; + + current_rp = rp; + +- c = getc (stream); +- if (c == EOF) ++ mbf_init (mbf, stream); ++ mbf_getc (c, mbf); ++ if (mb_iseof (c)) + return; + +- ungetc (c, stream); +- c = 0; ++ mbf_ungetc (c, mbf); ++ mb_setascii (&c, 0); ++ mb_copy (&d, &delim); + + /* To support the semantics of the -s flag, we may have to buffer + all of the first field to determine whether it is 'delimited.' +@@ -536,10 +744,14 @@ cut_fields (FILE *stream) + if (field_idx == 1 && buffer_first_field) + { + ssize_t len; +- size_t n_bytes; ++ size_t n_chars; ++ mbf_char_t nl; ++ mb_setascii (&nl, '\n'); ++ ++ len = mb_getndelim2 (&field_1_buffer, &field_1_bufsize, ++ GETNLINE_NO_LIMIT, d, nl, &mbf); ++ + +- len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0, +- GETNLINE_NO_LIMIT, delim, '\n', stream); + if (len < 0) + { + free (field_1_buffer); +@@ -549,15 +761,15 @@ cut_fields (FILE *stream) + xalloc_die (); + } + +- n_bytes = len; +- assert (n_bytes != 0); ++ n_chars = len; ++ //assert (n_chars != 0); + +- c = 0; ++ mb_setascii (&c, 0); + + /* If the first field extends to the end of line (it is not + delimited) and we are printing all non-delimited lines, + print this one. */ +- if (to_uchar (field_1_buffer[n_bytes - 1]) != delim) ++ if (!mb_equal (field_1_buffer[n_chars - 1], d)) + { + if (suppress_non_delimited) + { +@@ -565,26 +777,30 @@ cut_fields (FILE *stream) + } + else + { +- fwrite (field_1_buffer, sizeof (char), n_bytes, stdout); ++ for (int i = 0; i < n_chars; ++i) ++ mb_putc (field_1_buffer[i], stdout); ++ + /* Make sure the output line is newline terminated. */ +- if (field_1_buffer[n_bytes - 1] != '\n') ++ if (!mb_iseq (field_1_buffer[n_chars - 1], '\n')) + putchar ('\n'); +- c = '\n'; ++ mb_setascii (&c,'\n'); + } + continue; + } + if (print_kth (1)) + { + /* Print the field, but not the trailing delimiter. */ +- fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout); ++ for (int i = 0; i < n_chars - 1; ++i) ++ mb_putc (field_1_buffer[i], stdout); + + /* With -d$'\n' don't treat the last '\n' as a delimiter. */ +- if (delim == '\n') ++ if (mb_iseq (d, '\n')) + { +- int last_c = getc (stream); +- if (last_c != EOF) ++ mbf_char_t last_c; ++ mbf_getc (last_c, mbf); ++ if (!mb_iseof (last_c)) + { +- ungetc (last_c, stream); ++ mbf_ungetc (last_c, mbf); + found_any_selected_field = true; + } + } +@@ -594,7 +810,8 @@ cut_fields (FILE *stream) + next_item (&field_idx); + } + +- int prev_c = c; ++ mbf_char_t prev_c; ++ mb_copy (&prev_c, &c); + + if (print_kth (field_idx)) + { +@@ -605,41 +822,46 @@ cut_fields (FILE *stream) + } + found_any_selected_field = true; + +- while ((c = getc (stream)) != delim && c != '\n' && c != EOF) ++ mbf_getc (c, mbf); ++ while (!mb_equal (c, d) && !mb_iseq (c, '\n') && !mb_iseof (c)) + { +- putchar (c); +- prev_c = c; ++ mb_putc (c, stdout); ++ mb_copy (&prev_c, &c); ++ mbf_getc (c, mbf); + } + } + else + { +- while ((c = getc (stream)) != delim && c != '\n' && c != EOF) ++ mbf_getc (c, mbf); ++ while (!mb_equal (c, d) && !mb_iseq (c, '\n') && !mb_iseof (c)) + { +- prev_c = c; ++ mb_copy (&prev_c, &c); ++ mbf_getc (c, mbf); + } + } + + /* With -d$'\n' don't treat the last '\n' as a delimiter. */ +- if (delim == '\n' && c == delim) ++ if (mb_iseq (d, '\n') && mb_equal (c, d)) + { +- int last_c = getc (stream); +- if (last_c != EOF) +- ungetc (last_c, stream); ++ mbf_char_t last_c; ++ mbf_getc (last_c, mbf); ++ if (!mb_iseof (last_c)) ++ mbf_ungetc (last_c, mbf); + else +- c = last_c; ++ mb_copy (&c, &last_c); + } + +- if (c == delim) ++ if (mb_equal (c, d)) + next_item (&field_idx); +- else if (c == '\n' || c == EOF) ++ else if (mb_iseq (c, '\n') || mb_iseof (c)) + { + if (found_any_selected_field + || !(suppress_non_delimited && field_idx == 1)) + { +- if (c == '\n' || prev_c != '\n' || delim == '\n') ++ if (mb_iseq (c, '\n') || !mb_iseq (prev_c, '\n') || mb_iseq (d, '\n')) + putchar ('\n'); + } +- if (c == EOF) ++ if (mb_iseof (c)) + break; + field_idx = 1; + current_rp = rp; +@@ -652,7 +874,14 @@ static void + cut_stream (FILE *stream) + { + if (operating_mode == byte_mode) +- cut_bytes (stream); ++ { ++ if (no_break_mb_chars) ++ cut_chars (stream); ++ else ++ cut_bytes (stream); ++ } ++ else if (operating_mode == char_mode) ++ cut_chars (stream); + else + cut_fields (stream); + } +@@ -706,6 +935,7 @@ main (int argc, char **argv) + bool ok; + bool delim_specified = false; + char *spec_list_string IF_LINT ( = NULL); ++ mbi_iterator_t iter; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); +@@ -719,8 +949,10 @@ main (int argc, char **argv) + + /* By default, all non-delimited lines are printed. */ + suppress_non_delimited = false; ++ /* Default behaviour for -b, unless -n is also specified. */ ++ no_break_mb_chars = false; + +- delim = '\0'; ++ mb_setascii (&delim, '\0'); + have_read_stdin = false; + + while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1) +@@ -728,7 +960,6 @@ main (int argc, char **argv) + switch (optc) + { + case 'b': +- case 'c': + /* Build the byte list. */ + if (operating_mode != undefined_mode) + FATAL_ERROR (_("only one type of list may be specified")); +@@ -736,6 +967,14 @@ main (int argc, char **argv) + spec_list_string = optarg; + break; + ++ case 'c': ++ /* Build the char list. */ ++ if (operating_mode != undefined_mode) ++ FATAL_ERROR (_("only one type of list may be specified")); ++ operating_mode = char_mode; ++ spec_list_string = optarg; ++ break; ++ + case 'f': + /* Build the field list. */ + if (operating_mode != undefined_mode) +@@ -747,9 +986,15 @@ main (int argc, char **argv) + case 'd': + /* New delimiter. */ + /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */ +- if (optarg[0] != '\0' && optarg[1] != '\0') ++ mbi_init (iter, optarg, strlen (optarg)); ++ if (!mbi_avail (iter)) ++ mb_setascii (&delim, '\0'); ++ else ++ mb_copy (&delim, &mbi_cur (iter)); ++ ++ mbi_advance (iter); ++ if (mbi_avail (iter)) + FATAL_ERROR (_("the delimiter must be a single character")); +- delim = optarg[0]; + delim_specified = true; + break; + +@@ -763,6 +1008,7 @@ main (int argc, char **argv) + break; + + case 'n': ++ no_break_mb_chars = true; + break; + + case 's': +@@ -802,15 +1048,12 @@ main (int argc, char **argv) + } + + if (!delim_specified) +- delim = '\t'; ++ mb_setascii (&delim, '\t'); + + if (output_delimiter_string == NULL) + { +- static char dummy[2]; +- dummy[0] = delim; +- dummy[1] = '\0'; +- output_delimiter_string = dummy; +- output_delimiter_length = 1; ++ output_delimiter_string = mb_ptr (delim); ++ output_delimiter_length = mb_len (delim); + } + + if (optind == argc) diff --git a/coreutils-i18n.patch b/coreutils-i18n.patch index 5d3a591..e876fe3 100644 --- a/coreutils-i18n.patch +++ b/coreutils-i18n.patch @@ -23,579 +23,6 @@ diff -urNp coreutils-8.24-orig/lib/linebuffer.h coreutils-8.24/lib/linebuffer.h }; /* Initialize linebuffer LINEBUFFER for use. */ -diff -urNp coreutils-8.24-orig/src/cut.c coreutils-8.24/src/cut.c ---- coreutils-8.24-orig/src/cut.c 2015-06-26 19:05:22.000000000 +0200 -+++ coreutils-8.24/src/cut.c 2015-07-05 09:04:33.028546950 +0200 -@@ -28,6 +28,11 @@ - #include - #include - #include -+ -+/* Get mbstate_t, mbrtowc(). */ -+#if HAVE_WCHAR_H -+# include -+#endif - #include "system.h" - - #include "error.h" -@@ -37,6 +42,18 @@ - #include "quote.h" - #include "xstrndup.h" - -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 -+# undef MB_LEN_MAX -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ - /* The official name of this program (e.g., no 'g' prefix). */ - #define PROGRAM_NAME "cut" - -@@ -53,6 +70,52 @@ - } \ - while (0) - -+/* Refill the buffer BUF to get a multibyte character. */ -+#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \ -+ do \ -+ { \ -+ if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \ -+ { \ -+ memmove (BUF, BUFPOS, BUFLEN); \ -+ BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \ -+ BUFPOS = BUF; \ -+ } \ -+ } \ -+ while (0) -+ -+/* Get wide character on BUFPOS. BUFPOS is not included after that. -+ If byte sequence is not valid as a character, CONVFAIL is true. Otherwise false. */ -+#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \ -+ do \ -+ { \ -+ mbstate_t state_bak; \ -+ \ -+ if (BUFLEN < 1) \ -+ { \ -+ WC = WEOF; \ -+ break; \ -+ } \ -+ \ -+ /* Get a wide character. */ \ -+ CONVFAIL = false; \ -+ state_bak = STATE; \ -+ MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \ -+ \ -+ switch (MBLENGTH) \ -+ { \ -+ case (size_t)-1: \ -+ case (size_t)-2: \ -+ CONVFAIL = true; \ -+ STATE = state_bak; \ -+ /* Fall througn. */ \ -+ \ -+ case 0: \ -+ MBLENGTH = 1; \ -+ break; \ -+ } \ -+ } \ -+ while (0) -+ - - struct range_pair - { -@@ -75,6 +138,8 @@ static size_t n_rp; - /* Number of `struct range_pair's allocated. */ - static size_t n_rp_allocated; - -+/* Length of the delimiter given as argument to -d. */ -+size_t delimlen; - - /* Append LOW, HIGH to the list RP of range pairs, allocating additional - space if necessary. Update global variable N_RP. When allocating, -@@ -106,15 +171,25 @@ enum operating_mode - { - undefined_mode, - -- /* Output characters that are in the given bytes. */ -+ /* Output bytes that are at the given positions. */ - byte_mode, - -+ /* Output characters that are at the given positions. */ -+ character_mode, -+ - /* Output the given delimiter-separated fields. */ - field_mode - }; - - static enum operating_mode operating_mode; - -+/* If nonzero, when in byte mode, don't split multibyte characters. */ -+static int byte_mode_character_aware; -+ -+/* If nonzero, the function for single byte locale is work -+ if this program runs on multibyte locale. */ -+static int force_singlebyte_mode; -+ - /* If true do not output lines containing no delimiter characters. - Otherwise, all such lines are printed. This option is valid only - with field mode. */ -@@ -126,6 +201,9 @@ static bool complement; - - /* The delimiter character for field mode. */ - static unsigned char delim; -+#if HAVE_WCHAR_H -+static wchar_t wcdelim; -+#endif - - /* True if the --output-delimiter=STRING option was specified. */ - static bool output_delimiter_specified; -@@ -189,7 +267,7 @@ Print selected parts of lines from each - -f, --fields=LIST select only these fields; also print any line\n\ - that contains no delimiter character, unless\n\ - the -s option is specified\n\ -- -n (ignored)\n\ -+ -n with -b: don't split multibyte characters\n\ - "), stdout); - fputs (_("\ - --complement complement the set of selected bytes, characters\n\ -@@ -380,6 +458,9 @@ set_fields (const char *fieldstr) - if (operating_mode == byte_mode) - error (0, 0, - _("byte offset %s is too large"), quote (bad_num)); -+ else if (operating_mode == character_mode) -+ error (0, 0, -+ _("character offset %s is too large"), quote (bad_num)); - else - error (0, 0, - _("field number %s is too large"), quote (bad_num)); -@@ -504,6 +585,82 @@ cut_bytes (FILE *stream) - } - } - -+#if HAVE_MBRTOWC -+/* This function is in use for the following case. -+ -+ 1. Read from the stream STREAM, printing to standard output any selected -+ characters. -+ -+ 2. Read from stream STREAM, printing to standard output any selected bytes, -+ without splitting multibyte characters. */ -+ -+static void -+cut_characters_or_cut_bytes_no_split (FILE *stream) -+{ -+ size_t idx; /* number of bytes or characters in the line so far. */ -+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ -+ char *bufpos; /* Next read position of BUF. */ -+ size_t buflen; /* The length of the byte sequence in buf. */ -+ wint_t wc; /* A gotten wide character. */ -+ size_t mblength; /* The byte size of a multibyte character which shows -+ as same character as WC. */ -+ mbstate_t state; /* State of the stream. */ -+ bool convfail = false; /* true, when conversion failed. Otherwise false. */ -+ /* Whether to begin printing delimiters between ranges for the current line. -+ Set after we've begun printing data corresponding to the first range. */ -+ bool print_delimiter = false; -+ -+ idx = 0; -+ buflen = 0; -+ bufpos = buf; -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ current_rp = rp; -+ -+ while (1) -+ { -+ REFILL_BUFFER (buf, bufpos, buflen, stream); -+ -+ GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail); -+ (void) convfail; /* ignore unused */ -+ -+ if (wc == WEOF) -+ { -+ if (idx > 0) -+ putchar ('\n'); -+ break; -+ } -+ else if (wc == L'\n') -+ { -+ putchar ('\n'); -+ idx = 0; -+ print_delimiter = false; -+ current_rp = rp; -+ } -+ else -+ { -+ next_item (&idx); -+ if (print_kth (idx)) -+ { -+ if (output_delimiter_specified) -+ { -+ if (print_delimiter && is_range_start_index (idx)) -+ { -+ fwrite (output_delimiter_string, sizeof (char), -+ output_delimiter_length, stdout); -+ } -+ print_delimiter = true; -+ } -+ fwrite (bufpos, mblength, sizeof(char), stdout); -+ } -+ } -+ -+ buflen -= mblength; -+ bufpos += mblength; -+ } -+} -+#endif -+ - /* Read from stream STREAM, printing to standard output any selected fields. */ - - static void -@@ -648,13 +805,211 @@ cut_fields (FILE *stream) - } - } - -+#if HAVE_MBRTOWC -+static void -+cut_fields_mb (FILE *stream) -+{ -+ int c; -+ size_t field_idx; -+ int found_any_selected_field; -+ int buffer_first_field; -+ int empty_input; -+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ -+ char *bufpos; /* Next read position of BUF. */ -+ size_t buflen; /* The length of the byte sequence in buf. */ -+ wint_t wc = 0; /* A gotten wide character. */ -+ size_t mblength; /* The byte size of a multibyte character which shows -+ as same character as WC. */ -+ mbstate_t state; /* State of the stream. */ -+ bool convfail = false; /* true, when conversion failed. Otherwise false. */ -+ -+ current_rp = rp; -+ -+ found_any_selected_field = 0; -+ field_idx = 1; -+ bufpos = buf; -+ buflen = 0; -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ c = getc (stream); -+ empty_input = (c == EOF); -+ if (c != EOF) -+ { -+ ungetc (c, stream); -+ wc = 0; -+ } -+ else -+ wc = WEOF; -+ -+ /* To support the semantics of the -s flag, we may have to buffer -+ all of the first field to determine whether it is `delimited.' -+ But that is unnecessary if all non-delimited lines must be printed -+ and the first field has been selected, or if non-delimited lines -+ must be suppressed and the first field has *not* been selected. -+ That is because a non-delimited line has exactly one field. */ -+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1)); -+ -+ while (1) -+ { -+ if (field_idx == 1 && buffer_first_field) -+ { -+ int len = 0; -+ -+ while (1) -+ { -+ REFILL_BUFFER (buf, bufpos, buflen, stream); -+ -+ GET_NEXT_WC_FROM_BUFFER -+ (wc, bufpos, buflen, mblength, state, convfail); -+ -+ if (wc == WEOF) -+ break; -+ -+ field_1_buffer = xrealloc (field_1_buffer, len + mblength); -+ memcpy (field_1_buffer + len, bufpos, mblength); -+ len += mblength; -+ buflen -= mblength; -+ bufpos += mblength; -+ -+ if (!convfail && (wc == L'\n' || wc == wcdelim)) -+ break; -+ } -+ -+ if (len <= 0 && wc == WEOF) -+ break; -+ -+ /* If the first field extends to the end of line (it is not -+ delimited) and we are printing all non-delimited lines, -+ print this one. */ -+ if (convfail || (!convfail && wc != wcdelim)) -+ { -+ if (suppress_non_delimited) -+ { -+ /* Empty. */ -+ } -+ else -+ { -+ fwrite (field_1_buffer, sizeof (char), len, stdout); -+ /* Make sure the output line is newline terminated. */ -+ if (convfail || (!convfail && wc != L'\n')) -+ putchar ('\n'); -+ } -+ continue; -+ } -+ -+ if (print_kth (1)) -+ { -+ /* Print the field, but not the trailing delimiter. */ -+ fwrite (field_1_buffer, sizeof (char), len - 1, stdout); -+ found_any_selected_field = 1; -+ } -+ next_item (&field_idx); -+ } -+ -+ if (wc != WEOF) -+ { -+ if (print_kth (field_idx)) -+ { -+ if (found_any_selected_field) -+ { -+ fwrite (output_delimiter_string, sizeof (char), -+ output_delimiter_length, stdout); -+ } -+ found_any_selected_field = 1; -+ } -+ -+ while (1) -+ { -+ REFILL_BUFFER (buf, bufpos, buflen, stream); -+ -+ GET_NEXT_WC_FROM_BUFFER -+ (wc, bufpos, buflen, mblength, state, convfail); -+ -+ if (wc == WEOF) -+ break; -+ else if (!convfail && (wc == wcdelim || wc == L'\n')) -+ { -+ buflen -= mblength; -+ bufpos += mblength; -+ break; -+ } -+ -+ if (print_kth (field_idx)) -+ fwrite (bufpos, mblength, sizeof(char), stdout); -+ -+ buflen -= mblength; -+ bufpos += mblength; -+ } -+ } -+ -+ if ((!convfail || wc == L'\n') && buflen < 1) -+ wc = WEOF; -+ -+ if (!convfail && wc == wcdelim) -+ next_item (&field_idx); -+ else if (wc == WEOF || (!convfail && wc == L'\n')) -+ { -+ if (found_any_selected_field -+ || (!empty_input && !(suppress_non_delimited && field_idx == 1))) -+ putchar ('\n'); -+ if (wc == WEOF) -+ break; -+ field_idx = 1; -+ current_rp = rp; -+ found_any_selected_field = 0; -+ } -+ } -+} -+#endif -+ - static void - cut_stream (FILE *stream) - { -- if (operating_mode == byte_mode) -- cut_bytes (stream); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode) -+ { -+ switch (operating_mode) -+ { -+ case byte_mode: -+ if (byte_mode_character_aware) -+ cut_characters_or_cut_bytes_no_split (stream); -+ else -+ cut_bytes (stream); -+ break; -+ -+ case character_mode: -+ cut_characters_or_cut_bytes_no_split (stream); -+ break; -+ -+ case field_mode: -+ if (delimlen == 1) -+ { -+ /* Check if we have utf8 multibyte locale, so we can use this -+ optimization because of uniqueness of characters, which is -+ not true for e.g. SJIS */ -+ char * loc = setlocale(LC_CTYPE, NULL); -+ if (loc && (strstr (loc, "UTF-8") || strstr (loc, "utf-8") || -+ strstr (loc, "UTF8") || strstr (loc, "utf8"))) -+ { -+ cut_fields (stream); -+ break; -+ } -+ } -+ cut_fields_mb (stream); -+ break; -+ -+ default: -+ abort (); -+ } -+ } - else -- cut_fields (stream); -+#endif -+ { -+ if (operating_mode == field_mode) -+ cut_fields (stream); -+ else -+ cut_bytes (stream); -+ } - } - - /* Process file FILE to standard output. -@@ -706,6 +1061,7 @@ main (int argc, char **argv) - bool ok; - bool delim_specified = false; - char *spec_list_string IF_LINT ( = NULL); -+ char mbdelim[MB_LEN_MAX + 1]; - - initialize_main (&argc, &argv); - set_program_name (argv[0]); -@@ -728,7 +1084,6 @@ main (int argc, char **argv) - switch (optc) - { - case 'b': -- case 'c': - /* Build the byte list. */ - if (operating_mode != undefined_mode) - FATAL_ERROR (_("only one type of list may be specified")); -@@ -736,6 +1091,14 @@ main (int argc, char **argv) - spec_list_string = optarg; - break; - -+ case 'c': -+ /* Build the character list. */ -+ if (operating_mode != undefined_mode) -+ FATAL_ERROR (_("only one type of list may be specified")); -+ operating_mode = character_mode; -+ spec_list_string = optarg; -+ break; -+ - case 'f': - /* Build the field list. */ - if (operating_mode != undefined_mode) -@@ -747,10 +1110,38 @@ main (int argc, char **argv) - case 'd': - /* New delimiter. */ - /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */ -- if (optarg[0] != '\0' && optarg[1] != '\0') -- FATAL_ERROR (_("the delimiter must be a single character")); -- delim = optarg[0]; -- delim_specified = true; -+ { -+#if HAVE_MBRTOWC -+ if(MB_CUR_MAX > 1) -+ { -+ mbstate_t state; -+ -+ memset (&state, '\0', sizeof(mbstate_t)); -+ delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state); -+ -+ if (delimlen == (size_t)-1 || delimlen == (size_t)-2) -+ ++force_singlebyte_mode; -+ else -+ { -+ delimlen = (delimlen < 1) ? 1 : delimlen; -+ if (wcdelim != L'\0' && *(optarg + delimlen) != '\0') -+ FATAL_ERROR (_("the delimiter must be a single character")); -+ memcpy (mbdelim, optarg, delimlen); -+ mbdelim[delimlen] = '\0'; -+ if (delimlen == 1) -+ delim = *optarg; -+ } -+ } -+ -+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode) -+#endif -+ { -+ if (optarg[0] != '\0' && optarg[1] != '\0') -+ FATAL_ERROR (_("the delimiter must be a single character")); -+ delim = (unsigned char) optarg[0]; -+ } -+ delim_specified = true; -+ } - break; - - case OUTPUT_DELIMITER_OPTION: -@@ -763,6 +1154,7 @@ main (int argc, char **argv) - break; - - case 'n': -+ byte_mode_character_aware = 1; - break; - - case 's': -@@ -802,15 +1194,34 @@ main (int argc, char **argv) - } - - if (!delim_specified) -- delim = '\t'; -+ { -+ delim = '\t'; -+#ifdef HAVE_MBRTOWC -+ wcdelim = L'\t'; -+ mbdelim[0] = '\t'; -+ mbdelim[1] = '\0'; -+ delimlen = 1; -+#endif -+ } - - if (output_delimiter_string == NULL) - { -- static char dummy[2]; -- dummy[0] = delim; -- dummy[1] = '\0'; -- output_delimiter_string = dummy; -- output_delimiter_length = 1; -+#ifdef HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode) -+ { -+ output_delimiter_string = xstrdup(mbdelim); -+ output_delimiter_length = delimlen; -+ } -+ -+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode) -+#endif -+ { -+ static char dummy[2]; -+ dummy[0] = delim; -+ dummy[1] = '\0'; -+ output_delimiter_string = dummy; -+ output_delimiter_length = 1; -+ } - } - - if (optind == argc) diff -urNp coreutils-8.24-orig/src/fold.c coreutils-8.24/src/fold.c --- coreutils-8.24-orig/src/fold.c 2015-06-26 19:05:22.000000000 +0200 +++ coreutils-8.24/src/fold.c 2015-07-05 09:04:33.029546958 +0200 diff --git a/coreutils.spec b/coreutils.spec index 31cba83..a4c3ed1 100644 --- a/coreutils.spec +++ b/coreutils.spec @@ -1,7 +1,7 @@ Summary: A set of basic GNU tools commonly used in shell scripts Name: coreutils Version: 8.24 -Release: 106%{?dist} +Release: 107%{?dist} License: GPLv3+ Group: System Environment/Base Url: http://www.gnu.org/software/coreutils/ @@ -52,6 +52,8 @@ Patch713: coreutils-4.5.3-langinfo.patch Patch800: coreutils-i18n.patch # (sb) lin18nux/lsb compliance - expand/unexpand Patch801: coreutils-i18n-expand-unexpand.patch +# (sb) lin18nux/lsb compliance - cut +Patch802: coreutils-i18n-cut.patch #getgrouplist() patch from Ulrich Drepper. Patch908: coreutils-getgrouplist.patch @@ -184,6 +186,7 @@ including documentation and translations. # li18nux/lsb %patch800 -p1 -b .i18n %patch801 -p1 -b .i18n-expand +%patch802 -p1 -b .i18n-cut # Coreutils %patch908 -p1 -b .getgrouplist @@ -356,6 +359,9 @@ fi %license COPYING %changelog +* Fri Jan 15 2016 Ondrej Oprala - 8.24-107 +- Use the new i18n implementation for the cut utility + * Wed Jan 13 2016 Ondrej Vasik - 8.24-106 - mv: prevent dataloss when source dir is specified multiple times (#1297464, by P.Brady)