From: Zephyr Pellerin <zv@nxvr.org>
Date: Mon, 20 Mar 2017 15:36:41 -0700
Subject: [PATCH] Fix CVE-2016-10253
diff --git a/erts/emulator/pcre/pcre_compile.c b/erts/emulator/pcre/pcre_compile.c
index d48126a55d..15a81fae56 100644
--- a/erts/emulator/pcre/pcre_compile.c
+++ b/erts/emulator/pcre/pcre_compile.c
@@ -2335,34 +2335,36 @@ for (;;)
}
}
-
-
/*************************************************
-* Scan compiled branch for non-emptiness *
-*************************************************/
+ * Scan compiled branch for non-emptiness *
+ *************************************************/
/* This function scans through a branch of a compiled pattern to see whether it
-can match the empty string or not. It is called from could_be_empty()
-below and from compile_branch() when checking for an unlimited repeat of a
-group that can match nothing. Note that first_significant_code() skips over
-backward and negative forward assertions when its final argument is TRUE. If we
-hit an unclosed bracket, we return "empty" - this means we've struck an inner
-bracket whose current branch will already have been scanned.
-
-Arguments:
- code points to start of search
- endcode points to where to stop
- utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode
- cd contains pointers to tables etc.
-
-Returns: TRUE if what is matched could be empty
+ can match the empty string or not. It is called from could_be_empty()
+ below and from compile_branch() when checking for an unlimited repeat of a
+ group that can match nothing. Note that first_significant_code() skips over
+ backward and negative forward assertions when its final argument is TRUE. If we
+ hit an unclosed bracket, we return "empty" - this means we've struck an inner
+ bracket whose current branch will already have been scanned.
+
+ Arguments:
+ code points to start of search
+ endcode points to where to stop
+ utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode
+ cd contains pointers to tables etc.
+ recurses chain of recurse_check to catch mutual recursion
+
+ Returns: TRUE if what is matched could be empty
*/
+
static BOOL
could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
- BOOL utf, compile_data *cd)
+ BOOL utf, compile_data *cd, recurse_check *recurses)
{
register pcre_uchar c;
+recurse_check this_recurse;
+
for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
code < endcode;
code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
@@ -2390,25 +2392,47 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
if (c == OP_RECURSE)
{
- const pcre_uchar *scode;
+ const pcre_uchar *scode = cd->start_code + GET(code, 1);
+ const pcre_uchar *endgroup = scode;
BOOL empty_branch;
- /* Test for forward reference */
+ /* Test for forward reference or uncompleted reference. This is disabled
+ when called to scan a completed pattern by setting cd->start_workspace to
+ NULL. */
- for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)
- if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
+ if (cd->start_workspace != NULL)
+ {
+ const pcre_uchar *tcode;
+ for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
+ if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
+ if (GET(scode, 1) == 0) return TRUE; /* Unclosed */
+ }
- /* Not a forward reference, test for completed backward reference */
+ /* If the reference is to a completed group, we need to detect whether this
+ is a recursive call, as otherwise there will be an infinite loop. If it is
+ a recursion, just skip over it. Simple recursions are easily detected. For
+ mutual recursions we keep a chain on the stack. */
- empty_branch = FALSE;
- scode = cd->start_code + GET(code, 1);
- if (GET(scode, 1) == 0) return TRUE; /* Unclosed */
+ do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
+ if (code >= scode && code <= endgroup) continue; /* Simple recursion */
+ else
+ {
+ recurse_check *r = recurses;
+ for (r = recurses; r != NULL; r = r->prev)
+ if (r->group == scode) break;
+ if (r != NULL) continue; /* Mutual recursion */
+ }
+
+ /* Completed reference; scan the referenced group, remembering it on the
+ stack chain to detect mutual recursions. */
- /* Completed backwards reference */
+ empty_branch = FALSE;
+ this_recurse.prev = recurses;
+ this_recurse.group = scode;
do
{
- if (could_be_empty_branch(scode, endcode, utf, cd))
+ if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
{
empty_branch = TRUE;
break;
@@ -2448,7 +2472,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
if (c == OP_BRA || c == OP_BRAPOS ||
c == OP_CBRA || c == OP_CBRAPOS ||
c == OP_ONCE || c == OP_ONCE_NC ||
- c == OP_COND)
+ c == OP_COND || c == OP_SCOND)
{
BOOL empty_branch;
if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */
@@ -2464,8 +2488,8 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
empty_branch = FALSE;
do
{
- if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd))
- empty_branch = TRUE;
+ if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd,
+ recurses)) empty_branch = TRUE;
code += GET(code, 1);
}
while (*code == OP_ALT);
@@ -2522,34 +2546,57 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
/* Opcodes that must match a character */
+ case OP_ANY:
+ case OP_ALLANY:
+ case OP_ANYBYTE:
+
case OP_PROP:
case OP_NOTPROP:
+ case OP_ANYNL:
+
+ case OP_NOT_HSPACE:
+ case OP_HSPACE:
+ case OP_NOT_VSPACE:
+ case OP_VSPACE:
case OP_EXTUNI:
+
case OP_NOT_DIGIT:
case OP_DIGIT:
case OP_NOT_WHITESPACE:
case OP_WHITESPACE:
case OP_NOT_WORDCHAR:
case OP_WORDCHAR:
- case OP_ANY:
- case OP_ALLANY:
- case OP_ANYBYTE:
+
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
+
case OP_PLUS:
+ case OP_PLUSI:
case OP_MINPLUS:
- case OP_POSPLUS:
- case OP_EXACT:
+ case OP_MINPLUSI:
+
case OP_NOTPLUS:
+ case OP_NOTPLUSI:
case OP_NOTMINPLUS:
+ case OP_NOTMINPLUSI:
+
+ case OP_POSPLUS:
+ case OP_POSPLUSI:
case OP_NOTPOSPLUS:
+ case OP_NOTPOSPLUSI:
+
+ case OP_EXACT:
+ case OP_EXACTI:
case OP_NOTEXACT:
+ case OP_NOTEXACTI:
+
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEPOSPLUS:
case OP_TYPEEXACT:
+
return FALSE;
/* These are going to continue, as they may be empty, but we have to
@@ -2583,30 +2630,58 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
return TRUE;
/* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
- MINUPTO, and POSUPTO may be followed by a multibyte character */
+ MINUPTO, and POSUPTO and their caseless and negative versions may be
+ followed by a multibyte character. */
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
case OP_STAR:
case OP_STARI:
+ case OP_NOTSTAR:
+ case OP_NOTSTARI:
+
case OP_MINSTAR:
case OP_MINSTARI:
+ case OP_NOTMINSTAR:
+ case OP_NOTMINSTARI:
+
case OP_POSSTAR:
case OP_POSSTARI:
+ case OP_NOTPOSSTAR:
+ case OP_NOTPOSSTARI:
+
case OP_QUERY:
case OP_QUERYI:
+ case OP_NOTQUERY:
+ case OP_NOTQUERYI:
+
case OP_MINQUERY:
case OP_MINQUERYI:
+ case OP_NOTMINQUERY:
+ case OP_NOTMINQUERYI:
+
case OP_POSQUERY:
case OP_POSQUERYI:
+ case OP_NOTPOSQUERY:
+ case OP_NOTPOSQUERYI:
+
if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
break;
case OP_UPTO:
case OP_UPTOI:
+ case OP_NOTUPTO:
+ case OP_NOTUPTOI:
+
case OP_MINUPTO:
case OP_MINUPTOI:
+ case OP_NOTMINUPTO:
+ case OP_NOTMINUPTOI:
+
case OP_POSUPTO:
case OP_POSUPTOI:
+ case OP_NOTPOSUPTO:
+ case OP_NOTPOSUPTOI:
+
if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
break;
#endif
@@ -2632,7 +2707,6 @@ return TRUE;
}
-
/*************************************************
* Scan compiled regex for non-emptiness *
*************************************************/
@@ -2660,7 +2734,7 @@ could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
{
while (bcptr != NULL && bcptr->current_branch >= code)
{
- if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd))
+ if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
return FALSE;
bcptr = bcptr->outer;
}
@@ -2668,7 +2742,6 @@ return TRUE;
}
-
/*************************************************
* Check for POSIX class syntax *
*************************************************/
@@ -5392,7 +5465,7 @@ for (;; ptr++)
pcre_uchar *scode = bracode;
do
{
- if (could_be_empty_branch(scode, ketcode, utf, cd))
+ if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
{
*bracode += OP_SBRA - OP_BRA;
break;
diff --git a/erts/emulator/pcre/pcre_internal.h b/erts/emulator/pcre/pcre_internal.h
index af436bd99b..eb0db89619 100644
--- a/erts/emulator/pcre/pcre_internal.h
+++ b/erts/emulator/pcre/pcre_internal.h
@@ -2455,6 +2455,13 @@ typedef struct branch_chain {
pcre_uchar *current_branch;
} branch_chain;
+/* Structure for mutual recursion detection. */
+
+typedef struct recurse_check {
+ struct recurse_check *prev;
+ const pcre_uchar *group;
+} recurse_check;
+
/* Structure for items in a linked list that represents an explicit recursive
call within the pattern; used by pcre_exec(). */