From: Zephyr Pellerin Date: Mon, 20 Mar 2017 15:36:41 -0700 Subject: [PATCH] Fix CVE-2016-10253 diff --git a/erts/emulator/pcre/pcre_compile.c b/erts/emulator/pcre/pcre_compile.c index d48126a55d..15a81fae56 100644 --- a/erts/emulator/pcre/pcre_compile.c +++ b/erts/emulator/pcre/pcre_compile.c @@ -2335,34 +2335,36 @@ for (;;) } } - - /************************************************* -* Scan compiled branch for non-emptiness * -*************************************************/ + * Scan compiled branch for non-emptiness * + *************************************************/ /* This function scans through a branch of a compiled pattern to see whether it -can match the empty string or not. It is called from could_be_empty() -below and from compile_branch() when checking for an unlimited repeat of a -group that can match nothing. Note that first_significant_code() skips over -backward and negative forward assertions when its final argument is TRUE. If we -hit an unclosed bracket, we return "empty" - this means we've struck an inner -bracket whose current branch will already have been scanned. - -Arguments: - code points to start of search - endcode points to where to stop - utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode - cd contains pointers to tables etc. - -Returns: TRUE if what is matched could be empty + can match the empty string or not. It is called from could_be_empty() + below and from compile_branch() when checking for an unlimited repeat of a + group that can match nothing. Note that first_significant_code() skips over + backward and negative forward assertions when its final argument is TRUE. If we + hit an unclosed bracket, we return "empty" - this means we've struck an inner + bracket whose current branch will already have been scanned. + + Arguments: + code points to start of search + endcode points to where to stop + utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode + cd contains pointers to tables etc. + recurses chain of recurse_check to catch mutual recursion + + Returns: TRUE if what is matched could be empty */ + static BOOL could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode, - BOOL utf, compile_data *cd) + BOOL utf, compile_data *cd, recurse_check *recurses) { register pcre_uchar c; +recurse_check this_recurse; + for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); code < endcode; code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE)) @@ -2390,25 +2392,47 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); if (c == OP_RECURSE) { - const pcre_uchar *scode; + const pcre_uchar *scode = cd->start_code + GET(code, 1); + const pcre_uchar *endgroup = scode; BOOL empty_branch; - /* Test for forward reference */ + /* Test for forward reference or uncompleted reference. This is disabled + when called to scan a completed pattern by setting cd->start_workspace to + NULL. */ - for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE) - if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE; + if (cd->start_workspace != NULL) + { + const pcre_uchar *tcode; + for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE) + if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE; + if (GET(scode, 1) == 0) return TRUE; /* Unclosed */ + } - /* Not a forward reference, test for completed backward reference */ + /* If the reference is to a completed group, we need to detect whether this + is a recursive call, as otherwise there will be an infinite loop. If it is + a recursion, just skip over it. Simple recursions are easily detected. For + mutual recursions we keep a chain on the stack. */ - empty_branch = FALSE; - scode = cd->start_code + GET(code, 1); - if (GET(scode, 1) == 0) return TRUE; /* Unclosed */ + do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT); + if (code >= scode && code <= endgroup) continue; /* Simple recursion */ + else + { + recurse_check *r = recurses; + for (r = recurses; r != NULL; r = r->prev) + if (r->group == scode) break; + if (r != NULL) continue; /* Mutual recursion */ + } + + /* Completed reference; scan the referenced group, remembering it on the + stack chain to detect mutual recursions. */ - /* Completed backwards reference */ + empty_branch = FALSE; + this_recurse.prev = recurses; + this_recurse.group = scode; do { - if (could_be_empty_branch(scode, endcode, utf, cd)) + if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse)) { empty_branch = TRUE; break; @@ -2448,7 +2472,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); if (c == OP_BRA || c == OP_BRAPOS || c == OP_CBRA || c == OP_CBRAPOS || c == OP_ONCE || c == OP_ONCE_NC || - c == OP_COND) + c == OP_COND || c == OP_SCOND) { BOOL empty_branch; if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */ @@ -2464,8 +2488,8 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); empty_branch = FALSE; do { - if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd)) - empty_branch = TRUE; + if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, + recurses)) empty_branch = TRUE; code += GET(code, 1); } while (*code == OP_ALT); @@ -2522,34 +2546,57 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); /* Opcodes that must match a character */ + case OP_ANY: + case OP_ALLANY: + case OP_ANYBYTE: + case OP_PROP: case OP_NOTPROP: + case OP_ANYNL: + + case OP_NOT_HSPACE: + case OP_HSPACE: + case OP_NOT_VSPACE: + case OP_VSPACE: case OP_EXTUNI: + case OP_NOT_DIGIT: case OP_DIGIT: case OP_NOT_WHITESPACE: case OP_WHITESPACE: case OP_NOT_WORDCHAR: case OP_WORDCHAR: - case OP_ANY: - case OP_ALLANY: - case OP_ANYBYTE: + case OP_CHAR: case OP_CHARI: case OP_NOT: case OP_NOTI: + case OP_PLUS: + case OP_PLUSI: case OP_MINPLUS: - case OP_POSPLUS: - case OP_EXACT: + case OP_MINPLUSI: + case OP_NOTPLUS: + case OP_NOTPLUSI: case OP_NOTMINPLUS: + case OP_NOTMINPLUSI: + + case OP_POSPLUS: + case OP_POSPLUSI: case OP_NOTPOSPLUS: + case OP_NOTPOSPLUSI: + + case OP_EXACT: + case OP_EXACTI: case OP_NOTEXACT: + case OP_NOTEXACTI: + case OP_TYPEPLUS: case OP_TYPEMINPLUS: case OP_TYPEPOSPLUS: case OP_TYPEEXACT: + return FALSE; /* These are going to continue, as they may be empty, but we have to @@ -2583,30 +2630,58 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); return TRUE; /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO, - MINUPTO, and POSUPTO may be followed by a multibyte character */ + MINUPTO, and POSUPTO and their caseless and negative versions may be + followed by a multibyte character. */ #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 case OP_STAR: case OP_STARI: + case OP_NOTSTAR: + case OP_NOTSTARI: + case OP_MINSTAR: case OP_MINSTARI: + case OP_NOTMINSTAR: + case OP_NOTMINSTARI: + case OP_POSSTAR: case OP_POSSTARI: + case OP_NOTPOSSTAR: + case OP_NOTPOSSTARI: + case OP_QUERY: case OP_QUERYI: + case OP_NOTQUERY: + case OP_NOTQUERYI: + case OP_MINQUERY: case OP_MINQUERYI: + case OP_NOTMINQUERY: + case OP_NOTMINQUERYI: + case OP_POSQUERY: case OP_POSQUERYI: + case OP_NOTPOSQUERY: + case OP_NOTPOSQUERYI: + if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]); break; case OP_UPTO: case OP_UPTOI: + case OP_NOTUPTO: + case OP_NOTUPTOI: + case OP_MINUPTO: case OP_MINUPTOI: + case OP_NOTMINUPTO: + case OP_NOTMINUPTOI: + case OP_POSUPTO: case OP_POSUPTOI: + case OP_NOTPOSUPTO: + case OP_NOTPOSUPTOI: + if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]); break; #endif @@ -2632,7 +2707,6 @@ return TRUE; } - /************************************************* * Scan compiled regex for non-emptiness * *************************************************/ @@ -2660,7 +2734,7 @@ could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode, { while (bcptr != NULL && bcptr->current_branch >= code) { - if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd)) + if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL)) return FALSE; bcptr = bcptr->outer; } @@ -2668,7 +2742,6 @@ return TRUE; } - /************************************************* * Check for POSIX class syntax * *************************************************/ @@ -5392,7 +5465,7 @@ for (;; ptr++) pcre_uchar *scode = bracode; do { - if (could_be_empty_branch(scode, ketcode, utf, cd)) + if (could_be_empty_branch(scode, ketcode, utf, cd, NULL)) { *bracode += OP_SBRA - OP_BRA; break; diff --git a/erts/emulator/pcre/pcre_internal.h b/erts/emulator/pcre/pcre_internal.h index af436bd99b..eb0db89619 100644 --- a/erts/emulator/pcre/pcre_internal.h +++ b/erts/emulator/pcre/pcre_internal.h @@ -2455,6 +2455,13 @@ typedef struct branch_chain { pcre_uchar *current_branch; } branch_chain; +/* Structure for mutual recursion detection. */ + +typedef struct recurse_check { + struct recurse_check *prev; + const pcre_uchar *group; +} recurse_check; + /* Structure for items in a linked list that represents an explicit recursive call within the pattern; used by pcre_exec(). */