Tree - rpms/grep - CentOS Git server

rpms / grep

Blame SOURCES/grep-2.20-pcre-backported-fixes.patch

Blob History Raw

		51e48f	`diff --git a/src/grep.h b/src/grep.h`
		51e48f	`index 4935872..729c906 100644`
		51e48f	`--- a/src/grep.h`
		51e48f	`+++ b/src/grep.h`
		51e48f	`@@ -27,4 +27,19 @@ extern int match_words; /* -w */`
		51e48f	`extern int match_lines; /* -x */`
		51e48f	`extern unsigned char eolbyte; /* -z */`
		51e48f
		51e48f	`+/* An enum textbin describes the file's type, inferred from data read`
		51e48f	`+ before the first line is selected for output. */`
		51e48f	`+enum textbin`
		51e48f	`+ {`
		51e48f	`+ /* Binary, as it contains null bytes and the -z option is not in effect,`
		51e48f	`+ or it contains encoding errors. */`
		51e48f	`+ TEXTBIN_BINARY = -1,`
		51e48f	`+`
		51e48f	`+ /* Not known yet. Only text has been seen so far. */`
		51e48f	`+ TEXTBIN_UNKNOWN = 0,`
		51e48f	`+`
		51e48f	`+ /* Text. */`
		51e48f	`+ TEXTBIN_TEXT = 1`
		51e48f	`+ };`
		51e48f	`+`
		51e48f	`#endif`
		51e48f	`diff --git a/src/pcresearch.c b/src/pcresearch.c`
		51e48f	`index 820dd00..9938ffc 100644`
		51e48f	`--- a/src/pcresearch.c`
		51e48f	`+++ b/src/pcresearch.c`
		51e48f	`@@ -33,13 +33,19 @@ static pcre *cre;`
		51e48f	`/* Additional information about the pattern. */`
		51e48f	`static pcre_extra *extra;`
		51e48f
		51e48f	`-# ifdef PCRE_STUDY_JIT_COMPILE`
		51e48f	`-static pcre_jit_stack *jit_stack;`
		51e48f	`-# else`
		51e48f	`+# ifndef PCRE_STUDY_JIT_COMPILE`
		51e48f	`# define PCRE_STUDY_JIT_COMPILE 0`
		51e48f	`# endif`
		51e48f	`#endif`
		51e48f
		51e48f	`+/* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty`
		51e48f	`+ string matches when that flag is used. */`
		51e48f	`+static int empty_match[2];`
		51e48f	`+`
		51e48f	`+/* This must be at least 2; everything after that is for performance`
		51e48f	`+ in pcre_exec. */`
		51e48f	`+enum { NSUB = 300 };`
		51e48f	`+`
		51e48f	`void`
		51e48f	`Pcompile (char const *pattern, size_t size)`
		51e48f	`{`
		51e48f	`@@ -52,13 +58,17 @@ Pcompile (char const *pattern, size_t size)`
		51e48f	`char const *ep;`
		51e48f	`char *re = xnmalloc (4, size + 7);`
		51e48f	`int flags = (PCRE_MULTILINE`
		51e48f	`- \| (match_icase ? PCRE_CASELESS : 0)`
		51e48f	`- \| (using_utf8 () ? PCRE_UTF8 : 0));`
		51e48f	`+ \| (match_icase ? PCRE_CASELESS : 0));`
		51e48f	`char const *patlim = pattern + size;`
		51e48f	`char *n = re;`
		51e48f	`char const *p;`
		51e48f	`char const *pnul;`
		51e48f
		51e48f	`+ if (using_utf8 ())`
		51e48f	`+ flags \|= PCRE_UTF8;`
		51e48f	`+ else if (MB_CUR_MAX != 1)`
		51e48f	`+ error (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));`
		51e48f	`+`
		51e48f	`/* FIXME: Remove these restrictions. */`
		51e48f	`if (memchr (pattern, '\n', size))`
		51e48f	`error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));`
		51e48f	`@@ -114,14 +124,20 @@ Pcompile (char const *pattern, size_t size)`
		51e48f	`/* A 32K stack is allocated for the machine code by default, which`
		51e48f	`can grow to 512K if necessary. Since JIT uses far less memory`
		51e48f	`than the interpreter, this should be enough in practice. */`
		51e48f	`- jit_stack = pcre_jit_stack_alloc (32 * 1024, 512 * 1024);`
		51e48f	`+ pcre_jit_stack jit_stack = pcre_jit_stack_alloc (32 1024, 512 * 1024);`
		51e48f	`if (!jit_stack)`
		51e48f	`error (EXIT_TROUBLE, 0,`
		51e48f	`_("failed to allocate memory for the PCRE JIT stack"));`
		51e48f	`pcre_assign_jit_stack (extra, NULL, jit_stack);`
		51e48f	`}`
		51e48f	`+`
		51e48f	`# endif`
		51e48f	`free (re);`
		51e48f	`+`
		51e48f	`+ int sub[NSUB];`
		51e48f	`+ empty_match[false] = pcre_exec (cre, extra, "", 0, 0,`
		51e48f	`+ PCRE_NOTBOL, sub, NSUB);`
		51e48f	`+ empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);`
		51e48f	`#endif /* HAVE_LIBPCRE */`
		51e48f	`}`
		51e48f
		51e48f	`@@ -134,36 +150,110 @@ Pexecute (char const buf, size_t size, size_t match_size,`
		51e48f	`error (EXIT_TROUBLE, 0, _("internal error"));`
		51e48f	`return -1;`
		51e48f	`#else`
		51e48f	`- /* This array must have at least two elements; everything after that`
		51e48f	`- is just for performance improvement in pcre_exec. */`
		51e48f	`- int sub[300];`
		51e48f	`-`
		51e48f	`- const char line_buf, line_end, *line_next;`
		51e48f	`+ int sub[NSUB];`
		51e48f	`+ char const *p = start_ptr ? start_ptr : buf;`
		51e48f	`+ bool bol = p[-1] == eolbyte;`
		51e48f	`+ char const *line_start = buf;`
		51e48f	`int e = PCRE_ERROR_NOMATCH;`
		51e48f	`- ptrdiff_t start_ofs = start_ptr ? start_ptr - buf : 0;`
		51e48f	`+ char const *line_end;`
		51e48f
		51e48f	`- /* PCRE can't limit the matching to single lines, therefore we have to`
		51e48f	`- match each line in the buffer separately. */`
		51e48f	`- for (line_next = buf;`
		51e48f	`- e == PCRE_ERROR_NOMATCH && line_next < buf + size;`
		51e48f	`- start_ofs -= line_next - line_buf)`
		51e48f	`+ /* If the input type is unknown, the caller is still testing the`
		51e48f	`+ input, which means the current buffer cannot contain encoding`
		51e48f	`+ errors and a multiline search is typically more efficient.`
		51e48f	`+ Otherwise, a single-line search is typically faster, so that`
		51e48f	`+ pcre_exec doesn't waste time validating the entire input`
		51e48f	`+ buffer. */`
		51e48f	`+ bool multiline = TEXTBIN_UNKNOWN;`
		51e48f	`+`
		51e48f	`+ for (; p < buf + size; p = line_start = line_end + 1)`
		51e48f	`{`
		51e48f	`- line_buf = line_next;`
		51e48f	`- line_end = memchr (line_buf, eolbyte, (buf + size) - line_buf);`
		51e48f	`- if (line_end == NULL)`
		51e48f	`- line_next = line_end = buf + size;`
		51e48f	`- else`
		51e48f	`- line_next = line_end + 1;`
		51e48f	`+ bool too_big;`
		51e48f
		51e48f	`- if (start_ptr && start_ptr >= line_end)`
		51e48f	`- continue;`
		51e48f	`+ if (multiline)`
		51e48f	`+ {`
		51e48f	`+ size_t pcre_size_max = MIN (INT_MAX, SIZE_MAX - 1);`
		51e48f	`+ size_t scan_size = MIN (pcre_size_max + 1, buf + size - p);`
		51e48f	`+ line_end = memrchr (p, eolbyte, scan_size);`
		51e48f	`+ too_big = ! line_end;`
		51e48f	`+ }`
		51e48f	`+ else`
		51e48f	`+ {`
		51e48f	`+ line_end = memchr (p, eolbyte, buf + size - p);`
		51e48f	`+ too_big = INT_MAX < line_end - p;`
		51e48f	`+ }`
		51e48f
		51e48f	`- if (INT_MAX < line_end - line_buf)`
		51e48f	`+ if (too_big)`
		51e48f	`error (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));`
		51e48f
		51e48f	`- e = pcre_exec (cre, extra, line_buf, line_end - line_buf,`
		51e48f	`- start_ofs < 0 ? 0 : start_ofs, 0,`
		51e48f	`- sub, sizeof sub / sizeof *sub);`
		51e48f	`+ for (;;)`
		51e48f	`+ {`
		51e48f	`+ /* Skip past bytes that are easily determined to be encoding`
		51e48f	`+ errors, treating them as data that cannot match. This is`
		51e48f	`+ faster than having pcre_exec check them. */`
		51e48f	`+ while (mbclen_cache[to_uchar (*p)] == (size_t) -1)`
		51e48f	`+ {`
		51e48f	`+ p++;`
		51e48f	`+ bol = false;`
		51e48f	`+ }`
		51e48f	`+`
		51e48f	`+ /* Check for an empty match; this is faster than letting`
		51e48f	`+ pcre_exec do it. */`
		51e48f	`+ int search_bytes = line_end - p;`
		51e48f	`+ if (search_bytes == 0)`
		51e48f	`+ {`
		51e48f	`+ sub[0] = sub[1] = 0;`
		51e48f	`+ e = empty_match[bol];`
		51e48f	`+ break;`
		51e48f	`+ }`
		51e48f	`+`
		51e48f	`+ int options = 0;`
		51e48f	`+ if (!bol)`
		51e48f	`+ options \|= PCRE_NOTBOL;`
		51e48f	`+ if (multiline)`
		51e48f	`+ options \|= PCRE_NO_UTF8_CHECK;`
		51e48f	`+`
		51e48f	`+ e = pcre_exec (cre, extra, p, search_bytes, 0,`
		51e48f	`+ options, sub, NSUB);`
		51e48f	`+ if (e != PCRE_ERROR_BADUTF8)`
		51e48f	`+ {`
		51e48f	`+ if (0 < e && multiline && sub[1] - sub[0] != 0)`
		51e48f	`+ {`
		51e48f	`+ char const *nl = memchr (p + sub[0], eolbyte,`
		51e48f	`+ sub[1] - sub[0]);`
		51e48f	`+ if (nl)`
		51e48f	`+ {`
		51e48f	`+ /* This match crosses a line boundary; reject it. */`
		51e48f	`+ p += sub[0];`
		51e48f	`+ line_end = nl;`
		51e48f	`+ continue;`
		51e48f	`+ }`
		51e48f	`+ }`
		51e48f	`+ break;`
		51e48f	`+ }`
		51e48f	`+ int valid_bytes = sub[0];`
		51e48f	`+`
		51e48f	`+ /* Try to match the string before the encoding error.`
		51e48f	`+ Again, handle the empty-match case specially, for speed. */`
		51e48f	`+ if (valid_bytes == 0)`
		51e48f	`+ {`
		51e48f	`+ sub[1] = 0;`
		51e48f	`+ e = empty_match[bol];`
		51e48f	`+ }`
		51e48f	`+ else`
		51e48f	`+ e = pcre_exec (cre, extra, p, valid_bytes, 0,`
		51e48f	`+ options \| PCRE_NO_UTF8_CHECK \| PCRE_NOTEOL,`
		51e48f	`+ sub, NSUB);`
		51e48f	`+ if (e != PCRE_ERROR_NOMATCH \|\| valid_bytes < 0)`
		51e48f	`+ break;`
		51e48f	`+`
		51e48f	`+ /* Treat the encoding error as data that cannot match. */`
		51e48f	`+ p += valid_bytes + 1;`
		51e48f	`+ bol = false;`
		51e48f	`+ }`
		51e48f	`+`
		51e48f	`+ if (e != PCRE_ERROR_NOMATCH)`
		51e48f	`+ break;`
		51e48f	`+ bol = true;`
		51e48f	`}`
		51e48f
		51e48f	`if (e <= 0)`
		51e48f	`@@ -171,7 +261,7 @@ Pexecute (char const buf, size_t size, size_t match_size,`
		51e48f	`switch (e)`
		51e48f	`{`
		51e48f	`case PCRE_ERROR_NOMATCH:`
		51e48f	`- return -1;`
		51e48f	`+ break;`
		51e48f
		51e48f	`case PCRE_ERROR_NOMEMORY:`
		51e48f	`error (EXIT_TROUBLE, 0, _("memory exhausted"));`
		51e48f	`@@ -180,10 +270,6 @@ Pexecute (char const buf, size_t size, size_t match_size,`
		51e48f	`error (EXIT_TROUBLE, 0,`
		51e48f	`_("exceeded PCRE's backtracking limit"));`
		51e48f
		51e48f	`- case PCRE_ERROR_BADUTF8:`
		51e48f	`- error (EXIT_TROUBLE, 0,`
		51e48f	`- _("invalid UTF-8 byte sequence in input"));`
		51e48f	`-`
		51e48f	`default:`
		51e48f	`/* For now, we lump all remaining PCRE failures into this basket.`
		51e48f	`If anyone cares to provide sample grep usage that can trigger`
		51e48f	`@@ -192,30 +278,33 @@ Pexecute (char const buf, size_t size, size_t match_size,`
		51e48f	`error (EXIT_TROUBLE, 0, _("internal PCRE error: %d"), e);`
		51e48f	`}`
		51e48f
		51e48f	`- /* NOTREACHED */`
		51e48f	`return -1;`
		51e48f	`}`
		51e48f	`else`
		51e48f	`{`
		51e48f	`- /* Narrow down to the line we've found. */`
		51e48f	`- char const *beg = line_buf + sub[0];`
		51e48f	`- char const *end = line_buf + sub[1];`
		51e48f	`- char const *buflim = buf + size;`
		51e48f	`- char eol = eolbyte;`
		51e48f	`- if (!start_ptr)`
		51e48f	`+ char const *matchbeg = p + sub[0];`
		51e48f	`+ char const *matchend = p + sub[1];`
		51e48f	`+ char const *beg;`
		51e48f	`+ char const *end;`
		51e48f	`+ if (start_ptr)`
		51e48f	`{`
		51e48f	`- /* FIXME: The case when '\n' is not found indicates a bug:`
		51e48f	`- Since grep is line oriented, the match should never contain`
		51e48f	`- a newline, so there _must_ be a newline following.`
		51e48f	`- */`
		51e48f	`- if (!(end = memchr (end, eol, buflim - end)))`
		51e48f	`- end = buflim;`
		51e48f	`- else`
		51e48f	`- end++;`
		51e48f	`- while (buf < beg && beg[-1] != eol)`
		51e48f	`- --beg;`
		51e48f	`+ beg = matchbeg;`
		51e48f	`+ end = matchend;`
		51e48f	`+ }`
		51e48f	`+ else if (multiline)`
		51e48f	`+ {`
		51e48f	`+ char const *prev_nl = memrchr (line_start - 1, eolbyte,`
		51e48f	`+ matchbeg - (line_start - 1));`
		51e48f	`+ char const *next_nl = memchr (matchend, eolbyte,`
		51e48f	`+ line_end + 1 - matchend);`
		51e48f	`+ beg = prev_nl + 1;`
		51e48f	`+ end = next_nl + 1;`
		51e48f	`+ }`
		51e48f	`+ else`
		51e48f	`+ {`
		51e48f	`+ beg = line_start;`
		51e48f	`+ end = line_end + 1;`
		51e48f	`}`
		51e48f	`-`
		51e48f	`*match_size = end - beg;`
		51e48f	`return beg - buf;`
		51e48f	`}`
		51e48f	`diff --git a/src/search.h b/src/search.h`
		51e48f	`index 14877bc..e671bea 100644`
		51e48f	`--- a/src/search.h`
		51e48f	`+++ b/src/search.h`
		51e48f	`@@ -45,6 +45,7 @@ extern void kwsinit (kwset_t *);`
		51e48f
		51e48f	`extern char mbtoupper (char const , size_t , mb_len_map_t *);`
		51e48f	`extern void build_mbclen_cache (void);`
		51e48f	`+extern size_t mbclen_cache[];`
		51e48f	`extern ptrdiff_t mb_goback (char const *, char const , char const *);`
		51e48f	`extern wint_t mb_prev_wc (char const , char const , char const *);`
		51e48f	`extern wint_t mb_next_wc (char const , char const );`
		51e48f	`diff --git a/src/searchutils.c b/src/searchutils.c`
		51e48f	`index 5eb9a12..aba9335 100644`
		51e48f	`--- a/src/searchutils.c`
		51e48f	`+++ b/src/searchutils.c`
		51e48f	`@@ -22,7 +22,7 @@`
		51e48f
		51e48f	`#define NCHAR (UCHAR_MAX + 1)`
		51e48f
		51e48f	`-static size_t mbclen_cache[NCHAR];`
		51e48f	`+size_t mbclen_cache[NCHAR];`
		51e48f
		51e48f	`void`
		51e48f	`kwsinit (kwset_t *kwset)`
		51e48f	`diff --git a/tests/pcre-infloop b/tests/pcre-infloop`
		51e48f	`index 1b33e72..8054844 100755`
		51e48f	`--- a/tests/pcre-infloop`
		51e48f	`+++ b/tests/pcre-infloop`
		51e48f	`@@ -18,16 +18,16 @@`
		51e48f	`# along with this program. If not, see <http://www.gnu.org/licenses/>.`
		51e48f
		51e48f	`. "${srcdir=.}/init.sh"; path_prepend_ ../src`
		51e48f	`-require_pcre_`
		51e48f	`require_timeout_`
		51e48f	`require_en_utf8_locale_`
		51e48f	`require_compiled_in_MB_support`
		51e48f	`+LC_ALL=en_US.UTF-8 require_pcre_`
		51e48f
		51e48f	`printf 'a\201b\r' > in \|\| framework_failure_`
		51e48f
		51e48f	`fail=0`
		51e48f
		51e48f	`LC_ALL=en_US.UTF-8 timeout 3 grep -P 'a.?..b' in`
		51e48f	`-test $? = 2 \|\| fail_ "libpcre's match function appears to infloop"`
		51e48f	`+test $? = 1 \|\| fail_ "libpcre's match function appears to infloop"`
		51e48f
		51e48f	`Exit $fail`
		51e48f	`diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input`
		51e48f	`index 913e8ee..abcc7e8 100755`
		51e48f	`--- a/tests/pcre-invalid-utf8-input`
		51e48f	`+++ b/tests/pcre-invalid-utf8-input`
		51e48f	`@@ -8,14 +8,19 @@`
		51e48f	`# notice and this notice are preserved.`
		51e48f
		51e48f	`. "${srcdir=.}/init.sh"; path_prepend_ ../src`
		51e48f	`-require_pcre_`
		51e48f	`+require_timeout_`
		51e48f	`require_en_utf8_locale_`
		51e48f	`+require_compiled_in_MB_support`
		51e48f	`+LC_ALL=en_US.UTF-8 require_pcre_`
		51e48f
		51e48f	`fail=0`
		51e48f
		51e48f	`-printf 'j\202\nj\n' > in \|\| framework_failure_`
		51e48f	`+printf 'j\202j\nj\nk\202\n' > in \|\| framework_failure_`
		51e48f
		51e48f	`-LC_ALL=en_US.UTF-8 grep -P j in`
		51e48f	`-test $? -eq 2 \|\| fail=1`
		51e48f	`+LC_ALL=en_US.UTF-8 timeout 3 grep -P j in`
		51e48f	`+test $? -eq 0 \|\| fail=1`
		51e48f	`+`
		51e48f	`+LC_ALL=en_US.UTF-8 timeout 3 grep -P 'k$' in`
		51e48f	`+test $? -eq 1 \|\| fail=1`
		51e48f
		51e48f	`Exit $fail`
		51e48f	`diff --git a/tests/pcre-utf8 b/tests/pcre-utf8`
		51e48f	`index 41676f4..2dda116 100755`
		51e48f	`--- a/tests/pcre-utf8`
		51e48f	`+++ b/tests/pcre-utf8`
		51e48f	`@@ -8,8 +8,8 @@`
		51e48f	`# notice and this notice are preserved.`
		51e48f
		51e48f	`. "${srcdir=.}/init.sh"; path_prepend_ ../src`
		51e48f	`-require_pcre_`
		51e48f	`require_en_utf8_locale_`
		51e48f	`+LC_ALL=en_US.UTF-8 require_pcre_`
		51e48f
		51e48f	`fail=0`
		51e48f

rpms / grep

Source Code

Blame SOURCES/grep-2.20-pcre-backported-fixes.patch