Blame otp-0007-Fix-CVE-2016-10253.patch

776ede
From: Zephyr Pellerin <zv@nxvr.org>
776ede
Date: Mon, 20 Mar 2017 15:36:41 -0700
776ede
Subject: [PATCH] Fix CVE-2016-10253
776ede
776ede
776ede
diff --git a/erts/emulator/pcre/pcre_compile.c b/erts/emulator/pcre/pcre_compile.c
57b0bc
index d48126a55d..15a81fae56 100644
776ede
--- a/erts/emulator/pcre/pcre_compile.c
776ede
+++ b/erts/emulator/pcre/pcre_compile.c
776ede
@@ -2335,34 +2335,36 @@ for (;;)
776ede
   }
776ede
 }
776ede
 
776ede
-
776ede
-
776ede
 /*************************************************
776ede
-*    Scan compiled branch for non-emptiness      *
776ede
-*************************************************/
776ede
+ *    Scan compiled branch for non-emptiness      *
776ede
+ *************************************************/
776ede
 
776ede
 /* This function scans through a branch of a compiled pattern to see whether it
776ede
-can match the empty string or not. It is called from could_be_empty()
776ede
-below and from compile_branch() when checking for an unlimited repeat of a
776ede
-group that can match nothing. Note that first_significant_code() skips over
776ede
-backward and negative forward assertions when its final argument is TRUE. If we
776ede
-hit an unclosed bracket, we return "empty" - this means we've struck an inner
776ede
-bracket whose current branch will already have been scanned.
776ede
-
776ede
-Arguments:
776ede
-  code        points to start of search
776ede
-  endcode     points to where to stop
776ede
-  utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
776ede
-  cd          contains pointers to tables etc.
776ede
-
776ede
-Returns:      TRUE if what is matched could be empty
776ede
+   can match the empty string or not. It is called from could_be_empty()
776ede
+   below and from compile_branch() when checking for an unlimited repeat of a
776ede
+   group that can match nothing. Note that first_significant_code() skips over
776ede
+   backward and negative forward assertions when its final argument is TRUE. If we
776ede
+   hit an unclosed bracket, we return "empty" - this means we've struck an inner
776ede
+   bracket whose current branch will already have been scanned.
776ede
+
776ede
+   Arguments:
776ede
+   code        points to start of search
776ede
+   endcode     points to where to stop
776ede
+   utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
776ede
+   cd          contains pointers to tables etc.
776ede
+   recurses    chain of recurse_check to catch mutual recursion
776ede
+
776ede
+   Returns:      TRUE if what is matched could be empty
776ede
 */
776ede
 
776ede
+
776ede
 static BOOL
776ede
 could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
776ede
-  BOOL utf, compile_data *cd)
776ede
+  BOOL utf, compile_data *cd, recurse_check *recurses)
776ede
 {
776ede
 register pcre_uchar c;
776ede
+recurse_check this_recurse;
776ede
+
776ede
 for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
776ede
      code < endcode;
776ede
      code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
776ede
@@ -2390,25 +2392,47 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
776ede
 
776ede
   if (c == OP_RECURSE)
776ede
     {
776ede
-    const pcre_uchar *scode;
776ede
+    const pcre_uchar *scode = cd->start_code + GET(code, 1);
776ede
+    const pcre_uchar *endgroup = scode;
776ede
     BOOL empty_branch;
776ede
 
776ede
-    /* Test for forward reference */
776ede
+    /* Test for forward reference or uncompleted reference. This is disabled
776ede
+    when called to scan a completed pattern by setting cd->start_workspace to
776ede
+    NULL. */
776ede
 
776ede
-    for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)
776ede
-      if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
776ede
+    if (cd->start_workspace != NULL)
776ede
+      {
776ede
+      const pcre_uchar *tcode;
776ede
+      for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
776ede
+        if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
776ede
+      if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
776ede
+      }
776ede
 
776ede
-    /* Not a forward reference, test for completed backward reference */
776ede
+    /* If the reference is to a completed group, we need to detect whether this
776ede
+    is a recursive call, as otherwise there will be an infinite loop. If it is
776ede
+    a recursion, just skip over it. Simple recursions are easily detected. For
776ede
+    mutual recursions we keep a chain on the stack. */
776ede
 
776ede
-    empty_branch = FALSE;
776ede
-    scode = cd->start_code + GET(code, 1);
776ede
-    if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
776ede
+    do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
776ede
+    if (code >= scode && code <= endgroup) continue;  /* Simple recursion */
776ede
+    else
776ede
+      {
776ede
+      recurse_check *r = recurses;
776ede
+      for (r = recurses; r != NULL; r = r->prev)
776ede
+        if (r->group == scode) break;
776ede
+      if (r != NULL) continue;   /* Mutual recursion */
776ede
+      }
776ede
+
776ede
+    /* Completed reference; scan the referenced group, remembering it on the
776ede
+    stack chain to detect mutual recursions. */
776ede
 
776ede
-    /* Completed backwards reference */
776ede
+    empty_branch = FALSE;
776ede
+    this_recurse.prev = recurses;
776ede
+    this_recurse.group = scode;
776ede
 
776ede
     do
776ede
       {
776ede
-      if (could_be_empty_branch(scode, endcode, utf, cd))
776ede
+      if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
776ede
         {
776ede
         empty_branch = TRUE;
776ede
         break;
776ede
@@ -2448,7 +2472,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
776ede
   if (c == OP_BRA  || c == OP_BRAPOS ||
776ede
       c == OP_CBRA || c == OP_CBRAPOS ||
776ede
       c == OP_ONCE || c == OP_ONCE_NC ||
776ede
-      c == OP_COND)
776ede
+      c == OP_COND || c == OP_SCOND)
776ede
     {
776ede
     BOOL empty_branch;
776ede
     if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
776ede
@@ -2464,8 +2488,8 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
776ede
       empty_branch = FALSE;
776ede
       do
776ede
         {
776ede
-        if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd))
776ede
-          empty_branch = TRUE;
776ede
+        if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd,
776ede
+          recurses)) empty_branch = TRUE;
776ede
         code += GET(code, 1);
776ede
         }
776ede
       while (*code == OP_ALT);
776ede
@@ -2522,34 +2546,57 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
776ede
 
776ede
     /* Opcodes that must match a character */
776ede
 
776ede
+    case OP_ANY:
776ede
+    case OP_ALLANY:
776ede
+    case OP_ANYBYTE:
776ede
+
776ede
     case OP_PROP:
776ede
     case OP_NOTPROP:
776ede
+    case OP_ANYNL:
776ede
+
776ede
+    case OP_NOT_HSPACE:
776ede
+    case OP_HSPACE:
776ede
+    case OP_NOT_VSPACE:
776ede
+    case OP_VSPACE:
776ede
     case OP_EXTUNI:
776ede
+
776ede
     case OP_NOT_DIGIT:
776ede
     case OP_DIGIT:
776ede
     case OP_NOT_WHITESPACE:
776ede
     case OP_WHITESPACE:
776ede
     case OP_NOT_WORDCHAR:
776ede
     case OP_WORDCHAR:
776ede
-    case OP_ANY:
776ede
-    case OP_ALLANY:
776ede
-    case OP_ANYBYTE:
776ede
+
776ede
     case OP_CHAR:
776ede
     case OP_CHARI:
776ede
     case OP_NOT:
776ede
     case OP_NOTI:
776ede
+
776ede
     case OP_PLUS:
776ede
+    case OP_PLUSI:
776ede
     case OP_MINPLUS:
776ede
-    case OP_POSPLUS:
776ede
-    case OP_EXACT:
776ede
+    case OP_MINPLUSI:
776ede
+
776ede
     case OP_NOTPLUS:
776ede
+    case OP_NOTPLUSI:
776ede
     case OP_NOTMINPLUS:
776ede
+    case OP_NOTMINPLUSI:
776ede
+
776ede
+    case OP_POSPLUS:
776ede
+    case OP_POSPLUSI:
776ede
     case OP_NOTPOSPLUS:
776ede
+    case OP_NOTPOSPLUSI:
776ede
+
776ede
+    case OP_EXACT:
776ede
+    case OP_EXACTI:
776ede
     case OP_NOTEXACT:
776ede
+    case OP_NOTEXACTI:
776ede
+
776ede
     case OP_TYPEPLUS:
776ede
     case OP_TYPEMINPLUS:
776ede
     case OP_TYPEPOSPLUS:
776ede
     case OP_TYPEEXACT:
776ede
+
776ede
     return FALSE;
776ede
 
776ede
     /* These are going to continue, as they may be empty, but we have to
776ede
@@ -2583,30 +2630,58 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
776ede
     return TRUE;
776ede
 
776ede
     /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
776ede
-    MINUPTO, and POSUPTO may be followed by a multibyte character */
776ede
+    MINUPTO, and POSUPTO and their caseless and negative versions may be
776ede
+    followed by a multibyte character. */
776ede
 
776ede
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
776ede
     case OP_STAR:
776ede
     case OP_STARI:
776ede
+    case OP_NOTSTAR:
776ede
+    case OP_NOTSTARI:
776ede
+
776ede
     case OP_MINSTAR:
776ede
     case OP_MINSTARI:
776ede
+    case OP_NOTMINSTAR:
776ede
+    case OP_NOTMINSTARI:
776ede
+
776ede
     case OP_POSSTAR:
776ede
     case OP_POSSTARI:
776ede
+    case OP_NOTPOSSTAR:
776ede
+    case OP_NOTPOSSTARI:
776ede
+
776ede
     case OP_QUERY:
776ede
     case OP_QUERYI:
776ede
+    case OP_NOTQUERY:
776ede
+    case OP_NOTQUERYI:
776ede
+
776ede
     case OP_MINQUERY:
776ede
     case OP_MINQUERYI:
776ede
+    case OP_NOTMINQUERY:
776ede
+    case OP_NOTMINQUERYI:
776ede
+
776ede
     case OP_POSQUERY:
776ede
     case OP_POSQUERYI:
776ede
+    case OP_NOTPOSQUERY:
776ede
+    case OP_NOTPOSQUERYI:
776ede
+
776ede
     if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
776ede
     break;
776ede
 
776ede
     case OP_UPTO:
776ede
     case OP_UPTOI:
776ede
+    case OP_NOTUPTO:
776ede
+    case OP_NOTUPTOI:
776ede
+
776ede
     case OP_MINUPTO:
776ede
     case OP_MINUPTOI:
776ede
+    case OP_NOTMINUPTO:
776ede
+    case OP_NOTMINUPTOI:
776ede
+
776ede
     case OP_POSUPTO:
776ede
     case OP_POSUPTOI:
776ede
+    case OP_NOTPOSUPTO:
776ede
+    case OP_NOTPOSUPTOI:
776ede
+
776ede
     if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
776ede
     break;
776ede
 #endif
776ede
@@ -2632,7 +2707,6 @@ return TRUE;
776ede
 }
776ede
 
776ede
 
776ede
-
776ede
 /*************************************************
776ede
 *    Scan compiled regex for non-emptiness       *
776ede
 *************************************************/
776ede
@@ -2660,7 +2734,7 @@ could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
776ede
 {
776ede
 while (bcptr != NULL && bcptr->current_branch >= code)
776ede
   {
776ede
-  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd))
776ede
+  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
776ede
     return FALSE;
776ede
   bcptr = bcptr->outer;
776ede
   }
776ede
@@ -2668,7 +2742,6 @@ return TRUE;
776ede
 }
776ede
 
776ede
 
776ede
-
776ede
 /*************************************************
776ede
 *           Check for POSIX class syntax         *
776ede
 *************************************************/
776ede
@@ -5392,7 +5465,7 @@ for (;; ptr++)
776ede
             pcre_uchar *scode = bracode;
776ede
             do
776ede
               {
776ede
-              if (could_be_empty_branch(scode, ketcode, utf, cd))
776ede
+              if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
776ede
                 {
776ede
                 *bracode += OP_SBRA - OP_BRA;
776ede
                 break;
776ede
diff --git a/erts/emulator/pcre/pcre_internal.h b/erts/emulator/pcre/pcre_internal.h
57b0bc
index af436bd99b..eb0db89619 100644
776ede
--- a/erts/emulator/pcre/pcre_internal.h
776ede
+++ b/erts/emulator/pcre/pcre_internal.h
776ede
@@ -2455,6 +2455,13 @@ typedef struct branch_chain {
776ede
   pcre_uchar *current_branch;
776ede
 } branch_chain;
776ede
 
776ede
+/* Structure for mutual recursion detection. */
776ede
+
776ede
+typedef struct recurse_check {
776ede
+    struct recurse_check *prev;
776ede
+    const pcre_uchar *group;
776ede
+} recurse_check;
776ede
+
776ede
 /* Structure for items in a linked list that represents an explicit recursive
776ede
 call within the pattern; used by pcre_exec(). */
776ede