Tree - rpms/grep - CentOS Git server

rpms / grep

Blame grep-2.20-pcre-backported-fixes.patch

Blob History Raw

		23be49	`diff --git a/src/grep.h b/src/grep.h`
		23be49	`index 4935872..729c906 100644`
		23be49	`--- a/src/grep.h`
		23be49	`+++ b/src/grep.h`
		23be49	`@@ -27,4 +27,19 @@ extern int match_words; /* -w */`
		23be49	`extern int match_lines; /* -x */`
		23be49	`extern unsigned char eolbyte; /* -z */`
		23be49
		23be49	`+/* An enum textbin describes the file's type, inferred from data read`
		23be49	`+ before the first line is selected for output. */`
		23be49	`+enum textbin`
		23be49	`+ {`
		23be49	`+ /* Binary, as it contains null bytes and the -z option is not in effect,`
		23be49	`+ or it contains encoding errors. */`
		23be49	`+ TEXTBIN_BINARY = -1,`
		23be49	`+`
		23be49	`+ /* Not known yet. Only text has been seen so far. */`
		23be49	`+ TEXTBIN_UNKNOWN = 0,`
		23be49	`+`
		23be49	`+ /* Text. */`
		23be49	`+ TEXTBIN_TEXT = 1`
		23be49	`+ };`
		23be49	`+`
		23be49	`#endif`
		23be49	`diff --git a/src/pcresearch.c b/src/pcresearch.c`
		23be49	`index 820dd00..9938ffc 100644`
		23be49	`--- a/src/pcresearch.c`
		23be49	`+++ b/src/pcresearch.c`
		23be49	`@@ -33,13 +33,19 @@ static pcre *cre;`
		23be49	`/* Additional information about the pattern. */`
		23be49	`static pcre_extra *extra;`
		23be49
		23be49	`-# ifdef PCRE_STUDY_JIT_COMPILE`
		23be49	`-static pcre_jit_stack *jit_stack;`
		23be49	`-# else`
		23be49	`+# ifndef PCRE_STUDY_JIT_COMPILE`
		23be49	`# define PCRE_STUDY_JIT_COMPILE 0`
		23be49	`# endif`
		23be49	`#endif`
		23be49
		23be49	`+/* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty`
		23be49	`+ string matches when that flag is used. */`
		23be49	`+static int empty_match[2];`
		23be49	`+`
		23be49	`+/* This must be at least 2; everything after that is for performance`
		23be49	`+ in pcre_exec. */`
		23be49	`+enum { NSUB = 300 };`
		23be49	`+`
		23be49	`void`
		23be49	`Pcompile (char const *pattern, size_t size)`
		23be49	`{`
		23be49	`@@ -52,13 +58,17 @@ Pcompile (char const *pattern, size_t size)`
		23be49	`char const *ep;`
		23be49	`char *re = xnmalloc (4, size + 7);`
		23be49	`int flags = (PCRE_MULTILINE`
		23be49	`- \| (match_icase ? PCRE_CASELESS : 0)`
		23be49	`- \| (using_utf8 () ? PCRE_UTF8 : 0));`
		23be49	`+ \| (match_icase ? PCRE_CASELESS : 0));`
		23be49	`char const *patlim = pattern + size;`
		23be49	`char *n = re;`
		23be49	`char const *p;`
		23be49	`char const *pnul;`
		23be49
		23be49	`+ if (using_utf8 ())`
		23be49	`+ flags \|= PCRE_UTF8;`
		23be49	`+ else if (MB_CUR_MAX != 1)`
		23be49	`+ error (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));`
		23be49	`+`
		23be49	`/* FIXME: Remove these restrictions. */`
		23be49	`if (memchr (pattern, '\n', size))`
		23be49	`error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));`
		23be49	`@@ -114,14 +124,20 @@ Pcompile (char const *pattern, size_t size)`
		23be49	`/* A 32K stack is allocated for the machine code by default, which`
		23be49	`can grow to 512K if necessary. Since JIT uses far less memory`
		23be49	`than the interpreter, this should be enough in practice. */`
		23be49	`- jit_stack = pcre_jit_stack_alloc (32 * 1024, 512 * 1024);`
		23be49	`+ pcre_jit_stack jit_stack = pcre_jit_stack_alloc (32 1024, 512 * 1024);`
		23be49	`if (!jit_stack)`
		23be49	`error (EXIT_TROUBLE, 0,`
		23be49	`_("failed to allocate memory for the PCRE JIT stack"));`
		23be49	`pcre_assign_jit_stack (extra, NULL, jit_stack);`
		23be49	`}`
		23be49	`+`
		23be49	`# endif`
		23be49	`free (re);`
		23be49	`+`
		23be49	`+ int sub[NSUB];`
		23be49	`+ empty_match[false] = pcre_exec (cre, extra, "", 0, 0,`
		23be49	`+ PCRE_NOTBOL, sub, NSUB);`
		23be49	`+ empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);`
		23be49	`#endif /* HAVE_LIBPCRE */`
		23be49	`}`
		23be49
		23be49	`@@ -134,36 +150,110 @@ Pexecute (char const buf, size_t size, size_t match_size,`
		23be49	`error (EXIT_TROUBLE, 0, _("internal error"));`
		23be49	`return -1;`
		23be49	`#else`
		23be49	`- /* This array must have at least two elements; everything after that`
		23be49	`- is just for performance improvement in pcre_exec. */`
		23be49	`- int sub[300];`
		23be49	`-`
		23be49	`- const char line_buf, line_end, *line_next;`
		23be49	`+ int sub[NSUB];`
		23be49	`+ char const *p = start_ptr ? start_ptr : buf;`
		23be49	`+ bool bol = p[-1] == eolbyte;`
		23be49	`+ char const *line_start = buf;`
		23be49	`int e = PCRE_ERROR_NOMATCH;`
		23be49	`- ptrdiff_t start_ofs = start_ptr ? start_ptr - buf : 0;`
		23be49	`+ char const *line_end;`
		23be49
		23be49	`- /* PCRE can't limit the matching to single lines, therefore we have to`
		23be49	`- match each line in the buffer separately. */`
		23be49	`- for (line_next = buf;`
		23be49	`- e == PCRE_ERROR_NOMATCH && line_next < buf + size;`
		23be49	`- start_ofs -= line_next - line_buf)`
		23be49	`+ /* If the input type is unknown, the caller is still testing the`
		23be49	`+ input, which means the current buffer cannot contain encoding`
		23be49	`+ errors and a multiline search is typically more efficient.`
		23be49	`+ Otherwise, a single-line search is typically faster, so that`
		23be49	`+ pcre_exec doesn't waste time validating the entire input`
		23be49	`+ buffer. */`
		23be49	`+ bool multiline = TEXTBIN_UNKNOWN;`
		23be49	`+`
		23be49	`+ for (; p < buf + size; p = line_start = line_end + 1)`
		23be49	`{`
		23be49	`- line_buf = line_next;`
		23be49	`- line_end = memchr (line_buf, eolbyte, (buf + size) - line_buf);`
		23be49	`- if (line_end == NULL)`
		23be49	`- line_next = line_end = buf + size;`
		23be49	`- else`
		23be49	`- line_next = line_end + 1;`
		23be49	`+ bool too_big;`
		23be49
		23be49	`- if (start_ptr && start_ptr >= line_end)`
		23be49	`- continue;`
		23be49	`+ if (multiline)`
		23be49	`+ {`
		23be49	`+ size_t pcre_size_max = MIN (INT_MAX, SIZE_MAX - 1);`
		23be49	`+ size_t scan_size = MIN (pcre_size_max + 1, buf + size - p);`
		23be49	`+ line_end = memrchr (p, eolbyte, scan_size);`
		23be49	`+ too_big = ! line_end;`
		23be49	`+ }`
		23be49	`+ else`
		23be49	`+ {`
		23be49	`+ line_end = memchr (p, eolbyte, buf + size - p);`
		23be49	`+ too_big = INT_MAX < line_end - p;`
		23be49	`+ }`
		23be49
		23be49	`- if (INT_MAX < line_end - line_buf)`
		23be49	`+ if (too_big)`
		23be49	`error (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));`
		23be49
		23be49	`- e = pcre_exec (cre, extra, line_buf, line_end - line_buf,`
		23be49	`- start_ofs < 0 ? 0 : start_ofs, 0,`
		23be49	`- sub, sizeof sub / sizeof *sub);`
		23be49	`+ for (;;)`
		23be49	`+ {`
		23be49	`+ /* Skip past bytes that are easily determined to be encoding`
		23be49	`+ errors, treating them as data that cannot match. This is`
		23be49	`+ faster than having pcre_exec check them. */`
		23be49	`+ while (mbclen_cache[to_uchar (*p)] == (size_t) -1)`
		23be49	`+ {`
		23be49	`+ p++;`
		23be49	`+ bol = false;`
		23be49	`+ }`
		23be49	`+`
		23be49	`+ /* Check for an empty match; this is faster than letting`
		23be49	`+ pcre_exec do it. */`
		23be49	`+ int search_bytes = line_end - p;`
		23be49	`+ if (search_bytes == 0)`
		23be49	`+ {`
		23be49	`+ sub[0] = sub[1] = 0;`
		23be49	`+ e = empty_match[bol];`
		23be49	`+ break;`
		23be49	`+ }`
		23be49	`+`
		23be49	`+ int options = 0;`
		23be49	`+ if (!bol)`
		23be49	`+ options \|= PCRE_NOTBOL;`
		23be49	`+ if (multiline)`
		23be49	`+ options \|= PCRE_NO_UTF8_CHECK;`
		23be49	`+`
		23be49	`+ e = pcre_exec (cre, extra, p, search_bytes, 0,`
		23be49	`+ options, sub, NSUB);`
		23be49	`+ if (e != PCRE_ERROR_BADUTF8)`
		23be49	`+ {`
		23be49	`+ if (0 < e && multiline && sub[1] - sub[0] != 0)`
		23be49	`+ {`
		23be49	`+ char const *nl = memchr (p + sub[0], eolbyte,`
		23be49	`+ sub[1] - sub[0]);`
		23be49	`+ if (nl)`
		23be49	`+ {`
		23be49	`+ /* This match crosses a line boundary; reject it. */`
		23be49	`+ p += sub[0];`
		23be49	`+ line_end = nl;`
		23be49	`+ continue;`
		23be49	`+ }`
		23be49	`+ }`
		23be49	`+ break;`
		23be49	`+ }`
		23be49	`+ int valid_bytes = sub[0];`
		23be49	`+`
		23be49	`+ /* Try to match the string before the encoding error.`
		23be49	`+ Again, handle the empty-match case specially, for speed. */`
		23be49	`+ if (valid_bytes == 0)`
		23be49	`+ {`
		23be49	`+ sub[1] = 0;`
		23be49	`+ e = empty_match[bol];`
		23be49	`+ }`
		23be49	`+ else`
		23be49	`+ e = pcre_exec (cre, extra, p, valid_bytes, 0,`
		23be49	`+ options \| PCRE_NO_UTF8_CHECK \| PCRE_NOTEOL,`
		23be49	`+ sub, NSUB);`
		23be49	`+ if (e != PCRE_ERROR_NOMATCH)`
		23be49	`+ break;`
		23be49	`+`
		23be49	`+ /* Treat the encoding error as data that cannot match. */`
		23be49	`+ p += valid_bytes + 1;`
		23be49	`+ bol = false;`
		23be49	`+ }`
		23be49	`+`
		23be49	`+ if (e != PCRE_ERROR_NOMATCH)`
		23be49	`+ break;`
		23be49	`+ bol = true;`
		23be49	`}`
		23be49
		23be49	`if (e <= 0)`
		23be49	`@@ -171,7 +261,7 @@ Pexecute (char const buf, size_t size, size_t match_size,`
		23be49	`switch (e)`
		23be49	`{`
		23be49	`case PCRE_ERROR_NOMATCH:`
		23be49	`- return -1;`
		23be49	`+ break;`
		23be49
		23be49	`case PCRE_ERROR_NOMEMORY:`
		23be49	`error (EXIT_TROUBLE, 0, _("memory exhausted"));`
		23be49	`@@ -180,10 +270,6 @@ Pexecute (char const buf, size_t size, size_t match_size,`
		23be49	`error (EXIT_TROUBLE, 0,`
		23be49	`_("exceeded PCRE's backtracking limit"));`
		23be49
		23be49	`- case PCRE_ERROR_BADUTF8:`
		23be49	`- error (EXIT_TROUBLE, 0,`
		23be49	`- _("invalid UTF-8 byte sequence in input"));`
		23be49	`-`
		23be49	`default:`
		23be49	`/* For now, we lump all remaining PCRE failures into this basket.`
		23be49	`If anyone cares to provide sample grep usage that can trigger`
		23be49	`@@ -192,30 +278,33 @@ Pexecute (char const buf, size_t size, size_t match_size,`
		23be49	`error (EXIT_TROUBLE, 0, _("internal PCRE error: %d"), e);`
		23be49	`}`
		23be49
		23be49	`- /* NOTREACHED */`
		23be49	`return -1;`
		23be49	`}`
		23be49	`else`
		23be49	`{`
		23be49	`- /* Narrow down to the line we've found. */`
		23be49	`- char const *beg = line_buf + sub[0];`
		23be49	`- char const *end = line_buf + sub[1];`
		23be49	`- char const *buflim = buf + size;`
		23be49	`- char eol = eolbyte;`
		23be49	`- if (!start_ptr)`
		23be49	`+ char const *matchbeg = p + sub[0];`
		23be49	`+ char const *matchend = p + sub[1];`
		23be49	`+ char const *beg;`
		23be49	`+ char const *end;`
		23be49	`+ if (start_ptr)`
		23be49	`{`
		23be49	`- /* FIXME: The case when '\n' is not found indicates a bug:`
		23be49	`- Since grep is line oriented, the match should never contain`
		23be49	`- a newline, so there _must_ be a newline following.`
		23be49	`- */`
		23be49	`- if (!(end = memchr (end, eol, buflim - end)))`
		23be49	`- end = buflim;`
		23be49	`- else`
		23be49	`- end++;`
		23be49	`- while (buf < beg && beg[-1] != eol)`
		23be49	`- --beg;`
		23be49	`+ beg = matchbeg;`
		23be49	`+ end = matchend;`
		23be49	`+ }`
		23be49	`+ else if (multiline)`
		23be49	`+ {`
		23be49	`+ char const *prev_nl = memrchr (line_start - 1, eolbyte,`
		23be49	`+ matchbeg - (line_start - 1));`
		23be49	`+ char const *next_nl = memchr (matchend, eolbyte,`
		23be49	`+ line_end + 1 - matchend);`
		23be49	`+ beg = prev_nl + 1;`
		23be49	`+ end = next_nl + 1;`
		23be49	`+ }`
		23be49	`+ else`
		23be49	`+ {`
		23be49	`+ beg = line_start;`
		23be49	`+ end = line_end + 1;`
		23be49	`}`
		23be49	`-`
		23be49	`*match_size = end - beg;`
		23be49	`return beg - buf;`
		23be49	`}`
		23be49	`diff --git a/src/search.h b/src/search.h`
		23be49	`index 14877bc..e671bea 100644`
		23be49	`--- a/src/search.h`
		23be49	`+++ b/src/search.h`
		23be49	`@@ -45,6 +45,7 @@ extern void kwsinit (kwset_t *);`
		23be49
		23be49	`extern char mbtoupper (char const , size_t , mb_len_map_t *);`
		23be49	`extern void build_mbclen_cache (void);`
		23be49	`+extern size_t mbclen_cache[];`
		23be49	`extern ptrdiff_t mb_goback (char const *, char const , char const *);`
		23be49	`extern wint_t mb_prev_wc (char const , char const , char const *);`
		23be49	`extern wint_t mb_next_wc (char const , char const );`
		23be49	`diff --git a/src/searchutils.c b/src/searchutils.c`
		23be49	`index 5eb9a12..aba9335 100644`
		23be49	`--- a/src/searchutils.c`
		23be49	`+++ b/src/searchutils.c`
		23be49	`@@ -22,7 +22,7 @@`
		23be49
		23be49	`#define NCHAR (UCHAR_MAX + 1)`
		23be49
		23be49	`-static size_t mbclen_cache[NCHAR];`
		23be49	`+size_t mbclen_cache[NCHAR];`
		23be49
		23be49	`void`
		23be49	`kwsinit (kwset_t *kwset)`
		23be49	`diff --git a/tests/pcre-infloop b/tests/pcre-infloop`
		23be49	`index 1b33e72..8054844 100755`
		23be49	`--- a/tests/pcre-infloop`
		23be49	`+++ b/tests/pcre-infloop`
		23be49	`@@ -18,16 +18,16 @@`
		23be49	`# along with this program. If not, see <http://www.gnu.org/licenses/>.`
		23be49
		23be49	`. "${srcdir=.}/init.sh"; path_prepend_ ../src`
		23be49	`-require_pcre_`
		23be49	`require_timeout_`
		23be49	`require_en_utf8_locale_`
		23be49	`require_compiled_in_MB_support`
		23be49	`+LC_ALL=en_US.UTF-8 require_pcre_`
		23be49
		23be49	`printf 'a\201b\r' > in \|\| framework_failure_`
		23be49
		23be49	`fail=0`
		23be49
		23be49	`LC_ALL=en_US.UTF-8 timeout 3 grep -P 'a.?..b' in`
		23be49	`-test $? = 2 \|\| fail_ "libpcre's match function appears to infloop"`
		23be49	`+test $? = 1 \|\| fail_ "libpcre's match function appears to infloop"`
		23be49
		23be49	`Exit $fail`
		23be49	`diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input`
		23be49	`index 913e8ee..abcc7e8 100755`
		23be49	`--- a/tests/pcre-invalid-utf8-input`
		23be49	`+++ b/tests/pcre-invalid-utf8-input`
		23be49	`@@ -8,14 +8,19 @@`
		23be49	`# notice and this notice are preserved.`
		23be49
		23be49	`. "${srcdir=.}/init.sh"; path_prepend_ ../src`
		23be49	`-require_pcre_`
		23be49	`+require_timeout_`
		23be49	`require_en_utf8_locale_`
		23be49	`+require_compiled_in_MB_support`
		23be49	`+LC_ALL=en_US.UTF-8 require_pcre_`
		23be49
		23be49	`fail=0`
		23be49
		23be49	`-printf 'j\202\nj\n' > in \|\| framework_failure_`
		23be49	`+printf 'j\202j\nj\nk\202\n' > in \|\| framework_failure_`
		23be49
		23be49	`-LC_ALL=en_US.UTF-8 grep -P j in`
		23be49	`-test $? -eq 2 \|\| fail=1`
		23be49	`+LC_ALL=en_US.UTF-8 timeout 3 grep -P j in`
		23be49	`+test $? -eq 0 \|\| fail=1`
		23be49	`+`
		23be49	`+LC_ALL=en_US.UTF-8 timeout 3 grep -P 'k$' in`
		23be49	`+test $? -eq 1 \|\| fail=1`
		23be49
		23be49	`Exit $fail`
		23be49	`diff --git a/tests/pcre-utf8 b/tests/pcre-utf8`
		23be49	`index 41676f4..2dda116 100755`
		23be49	`--- a/tests/pcre-utf8`
		23be49	`+++ b/tests/pcre-utf8`
		23be49	`@@ -8,8 +8,8 @@`
		23be49	`# notice and this notice are preserved.`
		23be49
		23be49	`. "${srcdir=.}/init.sh"; path_prepend_ ../src`
		23be49	`-require_pcre_`
		23be49	`require_en_utf8_locale_`
		23be49	`+LC_ALL=en_US.UTF-8 require_pcre_`
		23be49
		23be49	`fail=0`
		23be49

rpms / grep

Source Code

Blame grep-2.20-pcre-backported-fixes.patch