Kamil Dudka d5245c
From 9618fb718b75920f37e5be2049ad1d0bb5c4a28c Mon Sep 17 00:00:00 2001
Kamil Dudka d5245c
From: Paul Eggert <eggert@cs.ucla.edu>
Kamil Dudka d5245c
Date: Tue, 26 Jan 2021 09:23:54 -0800
Kamil Dudka d5245c
Subject: [PATCH] expr: fix bug with unmatched \(...\)
Kamil Dudka d5245c
Kamil Dudka d5245c
Problem reported by Qiuhao Li.
Kamil Dudka d5245c
* doc/coreutils.texi (String expressions):
Kamil Dudka d5245c
Document the correct behavior, which POSIX requires.
Kamil Dudka d5245c
* src/expr.c (docolon): Treat unmatched \(...\) as empty.
Kamil Dudka d5245c
* tests/misc/expr.pl: New test.
Kamil Dudka d5245c
Kamil Dudka d5245c
Upstream-commit: 735083ba24878075235007b4417982ad5700436d
Kamil Dudka d5245c
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
Kamil Dudka d5245c
---
Kamil Dudka d5245c
 doc/coreutils.texi | 14 ++++++++------
Kamil Dudka d5245c
 src/expr.c         |  9 +++++++--
Kamil Dudka d5245c
 tests/misc/expr.pl |  3 +++
Kamil Dudka d5245c
 3 files changed, 18 insertions(+), 8 deletions(-)
Kamil Dudka d5245c
Kamil Dudka d5245c
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
Kamil Dudka d5245c
index 2382a16..5b2bb2c 100644
Kamil Dudka d5245c
--- a/doc/coreutils.texi
Kamil Dudka d5245c
+++ b/doc/coreutils.texi
Kamil Dudka d5245c
@@ -13529,12 +13529,14 @@ second is considered to be a (basic, a la GNU @code{grep}) regular
Kamil Dudka d5245c
 expression, with a @code{^} implicitly prepended.  The first argument is
Kamil Dudka d5245c
 then matched against this regular expression.
Kamil Dudka d5245c
 
Kamil Dudka d5245c
-If the match succeeds and @var{regex} uses @samp{\(} and @samp{\)}, the
Kamil Dudka d5245c
-@code{:} expression returns the part of @var{string} that matched the
Kamil Dudka d5245c
-subexpression; otherwise, it returns the number of characters matched.
Kamil Dudka d5245c
-
Kamil Dudka d5245c
-If the match fails, the @code{:} operator returns the null string if
Kamil Dudka d5245c
-@samp{\(} and @samp{\)} are used in @var{regex}, otherwise 0.
Kamil Dudka d5245c
+If @var{regex} does not use @samp{\(} and @samp{\)}, the @code{:}
Kamil Dudka d5245c
+expression returns the number of characters matched, or 0 if the match
Kamil Dudka d5245c
+fails.
Kamil Dudka d5245c
+
Kamil Dudka d5245c
+If @var{regex} uses @samp{\(} and @samp{\)}, the @code{:} expression
Kamil Dudka d5245c
+returns the part of @var{string} that matched the subexpression, or
Kamil Dudka d5245c
+the null string if the match failed or the subexpression did not
Kamil Dudka d5245c
+contribute to the match.
Kamil Dudka d5245c
 
Kamil Dudka d5245c
 @kindex \( @r{regexp operator}
Kamil Dudka d5245c
 Only the first @samp{\( @dots{} \)} pair is relevant to the return
Kamil Dudka d5245c
diff --git a/src/expr.c b/src/expr.c
Kamil Dudka d5245c
index e134872..0616a42 100644
Kamil Dudka d5245c
--- a/src/expr.c
Kamil Dudka d5245c
+++ b/src/expr.c
Kamil Dudka d5245c
@@ -721,8 +721,13 @@ docolon (VALUE *sv, VALUE *pv)
Kamil Dudka d5245c
       /* Were \(...\) used? */
Kamil Dudka d5245c
       if (re_buffer.re_nsub > 0)
Kamil Dudka d5245c
         {
Kamil Dudka d5245c
-          sv->u.s[re_regs.end[1]] = '\0';
Kamil Dudka d5245c
-          v = str_value (sv->u.s + re_regs.start[1]);
Kamil Dudka d5245c
+          if (re_regs.end[1] < 0)
Kamil Dudka d5245c
+            v = str_value ("");
Kamil Dudka d5245c
+          else
Kamil Dudka d5245c
+            {
Kamil Dudka d5245c
+              sv->u.s[re_regs.end[1]] = '\0';
Kamil Dudka d5245c
+              v = str_value (sv->u.s + re_regs.start[1]);
Kamil Dudka d5245c
+            }
Kamil Dudka d5245c
         }
Kamil Dudka d5245c
       else
Kamil Dudka d5245c
         {
Kamil Dudka d5245c
diff --git a/tests/misc/expr.pl b/tests/misc/expr.pl
Kamil Dudka d5245c
index e45f8e7..e57f79d 100755
Kamil Dudka d5245c
--- a/tests/misc/expr.pl
Kamil Dudka d5245c
+++ b/tests/misc/expr.pl
Kamil Dudka d5245c
@@ -84,6 +84,9 @@ my @Tests =
Kamil Dudka d5245c
      # In 5.94 and earlier, anchors incorrectly matched newlines.
Kamil Dudka d5245c
      ['anchor', "'a\nb' : 'a\$'", {OUT => '0'}, {EXIT => 1}],
Kamil Dudka d5245c
 
Kamil Dudka d5245c
+     # In 8.32, \( ... \) that did not match caused memory errors.
Kamil Dudka d5245c
+     ['emptysub', '"a" : "\\(b\\)*"', {OUT => ''}, {EXIT => 1}],
Kamil Dudka d5245c
+
Kamil Dudka d5245c
      # These tests are taken from grep/tests/bre.tests.
Kamil Dudka d5245c
      ['bre1', '"abc" : "a\\(b\\)c"', {OUT => 'b'}],
Kamil Dudka d5245c
      ['bre2', '"a(" : "a("', {OUT => '2'}],
Kamil Dudka d5245c
-- 
Kamil Dudka d5245c
2.26.2
Kamil Dudka d5245c