Kamil Dudka 8b01f2
From c479153d77b419a6cae4551b63d2b73096c1130e Mon Sep 17 00:00:00 2001
Kamil Dudka 8b01f2
From: Kamil Dudka <kdudka@redhat.com>
Kamil Dudka 8b01f2
Date: Mon, 18 Jul 2016 19:04:43 +0200
Kamil Dudka 8b01f2
Subject: [PATCH 1/3] maint: sort.c: deduplicate code for traversing numbers
Kamil Dudka 8b01f2
Kamil Dudka 8b01f2
* src/sort.c (traverse_raw_number): New function for traversing numbers.
Kamil Dudka 8b01f2
(find_unit_order): Use traverse_raw_number() instead of open-coding it.
Kamil Dudka 8b01f2
(debug_key): Likewise.
Kamil Dudka 8b01f2
---
Kamil Dudka 8b01f2
 src/sort.c | 63 ++++++++++++++++++++++++++++++++++----------------------------
Kamil Dudka 8b01f2
 1 file changed, 35 insertions(+), 28 deletions(-)
Kamil Dudka 8b01f2
Kamil Dudka 8b01f2
diff --git a/src/sort.c b/src/sort.c
Kamil Dudka 8b01f2
index 5b02343..e28bb6c 100644
Kamil Dudka 8b01f2
--- a/src/sort.c
Kamil Dudka 8b01f2
+++ b/src/sort.c
Kamil Dudka 8b01f2
@@ -2231,18 +2231,16 @@ static char const unit_order[UCHAR_LIM] =
Kamil Dudka 8b01f2
 #endif
Kamil Dudka 8b01f2
   };
Kamil Dudka 8b01f2
 
Kamil Dudka 8b01f2
-/* Return an integer that represents the order of magnitude of the
Kamil Dudka 8b01f2
-   unit following the number.  The number may contain thousands
Kamil Dudka 8b01f2
-   separators and a decimal point, but it may not contain leading blanks.
Kamil Dudka 8b01f2
-   Negative numbers get negative orders; zero numbers have a zero order.  */
Kamil Dudka 8b01f2
-
Kamil Dudka 8b01f2
-static int _GL_ATTRIBUTE_PURE
Kamil Dudka 8b01f2
-find_unit_order (char const *number)
Kamil Dudka 8b01f2
+/* Traverse number given as *number consisting of digits, thousands_sep, and
Kamil Dudka 8b01f2
+   decimal_point chars only.  Returns the highest digit found in the number,
Kamil Dudka 8b01f2
+   or '\0' if no digit has been found.  Upon return *number points at the
Kamil Dudka 8b01f2
+   character that immediately follows after the given number.  */
Kamil Dudka 8b01f2
+static unsigned char
Kamil Dudka 8b01f2
+traverse_raw_number (char const **number)
Kamil Dudka 8b01f2
 {
Kamil Dudka 8b01f2
-  bool minus_sign = (*number == '-');
Kamil Dudka 8b01f2
-  char const *p = number + minus_sign;
Kamil Dudka 8b01f2
-  int nonzero = 0;
Kamil Dudka 8b01f2
+  char const *p = *number;
Kamil Dudka 8b01f2
   unsigned char ch;
Kamil Dudka 8b01f2
+  unsigned char max_digit = '\0';
Kamil Dudka 8b01f2
 
Kamil Dudka 8b01f2
   /* Scan to end of number.
Kamil Dudka 8b01f2
      Decimals or separators not followed by digits stop the scan.
Kamil Dudka 8b01f2
@@ -2253,16 +2251,34 @@ find_unit_order (char const *number)
Kamil Dudka 8b01f2
   do
Kamil Dudka 8b01f2
     {
Kamil Dudka 8b01f2
       while (ISDIGIT (ch = *p++))
Kamil Dudka 8b01f2
-        nonzero |= ch - '0';
Kamil Dudka 8b01f2
+        if (max_digit < ch)
Kamil Dudka 8b01f2
+          max_digit = ch;
Kamil Dudka 8b01f2
     }
Kamil Dudka 8b01f2
   while (ch == thousands_sep);
Kamil Dudka 8b01f2
 
Kamil Dudka 8b01f2
   if (ch == decimal_point)
Kamil Dudka 8b01f2
     while (ISDIGIT (ch = *p++))
Kamil Dudka 8b01f2
-      nonzero |= ch - '0';
Kamil Dudka 8b01f2
+      if (max_digit < ch)
Kamil Dudka 8b01f2
+        max_digit = ch;
Kamil Dudka 8b01f2
+
Kamil Dudka 8b01f2
+  *number = p - 1;
Kamil Dudka 8b01f2
+  return max_digit;
Kamil Dudka 8b01f2
+}
Kamil Dudka 8b01f2
+
Kamil Dudka 8b01f2
+/* Return an integer that represents the order of magnitude of the
Kamil Dudka 8b01f2
+   unit following the number.  The number may contain thousands
Kamil Dudka 8b01f2
+   separators and a decimal point, but it may not contain leading blanks.
Kamil Dudka 8b01f2
+   Negative numbers get negative orders; zero numbers have a zero order.  */
Kamil Dudka 8b01f2
 
Kamil Dudka 8b01f2
-  if (nonzero)
Kamil Dudka 8b01f2
+static int _GL_ATTRIBUTE_PURE
Kamil Dudka 8b01f2
+find_unit_order (char const *number)
Kamil Dudka 8b01f2
+{
Kamil Dudka 8b01f2
+  bool minus_sign = (*number == '-');
Kamil Dudka 8b01f2
+  char const *p = number + minus_sign;
Kamil Dudka 8b01f2
+  unsigned char max_digit = traverse_raw_number (&p);
Kamil Dudka 8b01f2
+  if ('0' < max_digit)
Kamil Dudka 8b01f2
     {
Kamil Dudka 8b01f2
+      unsigned char ch = *p;
Kamil Dudka 8b01f2
       int order = unit_order[ch];
Kamil Dudka 8b01f2
       return (minus_sign ? -order : order);
Kamil Dudka 8b01f2
     }
Kamil Dudka 8b01f2
@@ -2655,23 +2671,14 @@ debug_key (struct line const *line, struct keyfield const *key)
Kamil Dudka 8b01f2
             ignore_value (strtold (beg, &tighter_lim));
Kamil Dudka 8b01f2
           else if (key->numeric || key->human_numeric)
Kamil Dudka 8b01f2
             {
Kamil Dudka 8b01f2
-              char *p = beg + (beg < lim && *beg == '-');
Kamil Dudka 8b01f2
-              bool found_digit = false;
Kamil Dudka 8b01f2
-              unsigned char ch;
Kamil Dudka 8b01f2
-
Kamil Dudka 8b01f2
-              do
Kamil Dudka 8b01f2
+              char const *p = beg + (beg < lim && *beg == '-');
Kamil Dudka 8b01f2
+              unsigned char max_digit = traverse_raw_number (&p);
Kamil Dudka 8b01f2
+              if ('0' <= max_digit)
Kamil Dudka 8b01f2
                 {
Kamil Dudka 8b01f2
-                  while (ISDIGIT (ch = *p++))
Kamil Dudka 8b01f2
-                    found_digit = true;
Kamil Dudka 8b01f2
+                  unsigned char ch = *p;
Kamil Dudka 8b01f2
+                  tighter_lim = (char *) p
Kamil Dudka 8b01f2
+                    + (key->human_numeric && unit_order[ch]);
Kamil Dudka 8b01f2
                 }
Kamil Dudka 8b01f2
-              while (ch == thousands_sep);
Kamil Dudka 8b01f2
-
Kamil Dudka 8b01f2
-              if (ch == decimal_point)
Kamil Dudka 8b01f2
-                while (ISDIGIT (ch = *p++))
Kamil Dudka 8b01f2
-                  found_digit = true;
Kamil Dudka 8b01f2
-
Kamil Dudka 8b01f2
-              if (found_digit)
Kamil Dudka 8b01f2
-                tighter_lim = p - ! (key->human_numeric && unit_order[ch]);
Kamil Dudka 8b01f2
             }
Kamil Dudka 8b01f2
           else
Kamil Dudka 8b01f2
             tighter_lim = lim;
Kamil Dudka 8b01f2
-- 
Kamil Dudka 8b01f2
2.5.5
Kamil Dudka 8b01f2
Kamil Dudka 8b01f2
Kamil Dudka 8b01f2
From 8c39465a5b0343ff7a21286dd69ed5430685d2f7 Mon Sep 17 00:00:00 2001
Kamil Dudka 8b01f2
From: Kamil Dudka <kdudka@redhat.com>
Kamil Dudka 8b01f2
Date: Mon, 18 Jul 2016 19:04:44 +0200
Kamil Dudka 8b01f2
Subject: [PATCH 2/3] sort: make -h work with -k and blank used as thousands
Kamil Dudka 8b01f2
 separator
Kamil Dudka 8b01f2
Kamil Dudka 8b01f2
* src/sort.c (traverse_raw_number): Allow to skip only one occurrence
Kamil Dudka 8b01f2
of thousands_sep to avoid finding the unit in the next column in case
Kamil Dudka 8b01f2
thousands_sep matches as blank and is used as column delimiter.
Kamil Dudka 8b01f2
* tests/misc/sort-h-thousands-sep.sh: Add regression test for this bug.
Kamil Dudka 8b01f2
* tests/local.mk: Reference the test.
Kamil Dudka 8b01f2
* NEWS: Mention the bug fix.
Kamil Dudka 8b01f2
Reported at https://bugzilla.redhat.com/1355780
Kamil Dudka 8b01f2
Fixes http://bugs.gnu.org/24015
Kamil Dudka 8b01f2
---
Kamil Dudka 8b01f2
 src/sort.c                         | 14 ++++++++----
Kamil Dudka 8b01f2
 tests/local.mk                     |  1 +
Kamil Dudka 8b01f2
 tests/misc/sort-h-thousands-sep.sh | 47 ++++++++++++++++++++++++++++++++++++++
Kamil Dudka 8b01f2
 3 files changed, 57 insertions(+), 5 deletions(-)
Kamil Dudka 8b01f2
 create mode 100755 tests/misc/sort-h-thousands-sep.sh
Kamil Dudka 8b01f2
Kamil Dudka 8b01f2
diff --git a/src/sort.c b/src/sort.c
Kamil Dudka 8b01f2
index e28bb6c..dd3ba58 100644
Kamil Dudka 8b01f2
--- a/src/sort.c
Kamil Dudka 8b01f2
+++ b/src/sort.c
Kamil Dudka 8b01f2
@@ -2248,13 +2248,17 @@ traverse_raw_number (char const **number)
Kamil Dudka 8b01f2
      to be lacking in units.
Kamil Dudka 8b01f2
      FIXME: add support for multibyte thousands_sep and decimal_point.  */
Kamil Dudka 8b01f2
 
Kamil Dudka 8b01f2
-  do
Kamil Dudka 8b01f2
+  while (ISDIGIT (ch = *p++))
Kamil Dudka 8b01f2
     {
Kamil Dudka 8b01f2
-      while (ISDIGIT (ch = *p++))
Kamil Dudka 8b01f2
-        if (max_digit < ch)
Kamil Dudka 8b01f2
-          max_digit = ch;
Kamil Dudka 8b01f2
+      if (max_digit < ch)
Kamil Dudka 8b01f2
+        max_digit = ch;
Kamil Dudka 8b01f2
+
Kamil Dudka 8b01f2
+      /* Allow to skip only one occurrence of thousands_sep to avoid finding
Kamil Dudka 8b01f2
+         the unit in the next column in case thousands_sep matches as blank
Kamil Dudka 8b01f2
+         and is used as column delimiter.  */
Kamil Dudka 8b01f2
+      if (*p == thousands_sep)
Kamil Dudka 8b01f2
+        ++p;
Kamil Dudka 8b01f2
     }
Kamil Dudka 8b01f2
-  while (ch == thousands_sep);
Kamil Dudka 8b01f2
 
Kamil Dudka 8b01f2
   if (ch == decimal_point)
Kamil Dudka 8b01f2
     while (ISDIGIT (ch = *p++))
Kamil Dudka 8b01f2
diff --git a/tests/local.mk b/tests/local.mk
Kamil Dudka 8b01f2
index 42d39f2..dccff8d 100644
Kamil Dudka 8b01f2
--- a/tests/local.mk
Kamil Dudka 8b01f2
+++ b/tests/local.mk
Kamil Dudka 8b01f2
@@ -344,6 +344,7 @@ all_tests =					\
Kamil Dudka 8b01f2
   tests/misc/sort-discrim.sh			\
Kamil Dudka 8b01f2
   tests/misc/sort-files0-from.pl		\
Kamil Dudka 8b01f2
   tests/misc/sort-float.sh			\
Kamil Dudka 8b01f2
+  tests/misc/sort-h-thousands-sep.sh		\
Kamil Dudka 8b01f2
   tests/misc/sort-mb-tests.sh			\
Kamil Dudka 8b01f2
   tests/i18n/sort.sh				\
Kamil Dudka 8b01f2
   tests/misc/sort-merge.pl			\
Kamil Dudka 8b01f2
diff --git a/tests/misc/sort-h-thousands-sep.sh b/tests/misc/sort-h-thousands-sep.sh
Kamil Dudka 8b01f2
new file mode 100755
Kamil Dudka 8b01f2
index 0000000..17f1b6c
Kamil Dudka 8b01f2
--- /dev/null
Kamil Dudka 8b01f2
+++ b/tests/misc/sort-h-thousands-sep.sh
Kamil Dudka 8b01f2
@@ -0,0 +1,47 @@
Kamil Dudka 8b01f2
+#!/bin/sh
Kamil Dudka 8b01f2
+# exercise 'sort -h' in locales where thousands separator is blank
Kamil Dudka 8b01f2
+
Kamil Dudka 8b01f2
+# Copyright (C) 2016 Free Software Foundation, Inc.
Kamil Dudka 8b01f2
+
Kamil Dudka 8b01f2
+# This program is free software: you can redistribute it and/or modify
Kamil Dudka 8b01f2
+# it under the terms of the GNU General Public License as published by
Kamil Dudka 8b01f2
+# the Free Software Foundation, either version 3 of the License, or
Kamil Dudka 8b01f2
+# (at your option) any later version.
Kamil Dudka 8b01f2
+
Kamil Dudka 8b01f2
+# This program is distributed in the hope that it will be useful,
Kamil Dudka 8b01f2
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
Kamil Dudka 8b01f2
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Kamil Dudka 8b01f2
+# GNU General Public License for more details.
Kamil Dudka 8b01f2
+
Kamil Dudka 8b01f2
+# You should have received a copy of the GNU General Public License
Kamil Dudka 8b01f2
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
Kamil Dudka 8b01f2
+
Kamil Dudka 8b01f2
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
Kamil Dudka 8b01f2
+print_ver_ sort
Kamil Dudka 8b01f2
+test "$(LC_ALL=sv_SE locale thousands_sep)" = ' ' \
Kamil Dudka 8b01f2
+  || skip_ 'The Swedish locale with blank thousands separator is unavailable.'
Kamil Dudka 8b01f2
+
Kamil Dudka 8b01f2
+tee exp1 > in << _EOF_
Kamil Dudka 8b01f2
+1       1k      4 003   1M
Kamil Dudka 8b01f2
+2k      2M      4 002   2
Kamil Dudka 8b01f2
+3M      3       4 001   3k
Kamil Dudka 8b01f2
+_EOF_
Kamil Dudka 8b01f2
+
Kamil Dudka 8b01f2
+cat > exp2 << _EOF_
Kamil Dudka 8b01f2
+3M      3       4 001   3k
Kamil Dudka 8b01f2
+1       1k      4 003   1M
Kamil Dudka 8b01f2
+2k      2M      4 002   2
Kamil Dudka 8b01f2
+_EOF_
Kamil Dudka 8b01f2
+
Kamil Dudka 8b01f2
+cat > exp3 << _EOF_
Kamil Dudka 8b01f2
+3M      3       4 001   3k
Kamil Dudka 8b01f2
+2k      2M      4 002   2
Kamil Dudka 8b01f2
+1       1k      4 003   1M
Kamil Dudka 8b01f2
+_EOF_
Kamil Dudka 8b01f2
+
Kamil Dudka 8b01f2
+for i in 1 2 3; do
Kamil Dudka 8b01f2
+  LC_ALL="sv_SE.utf8" sort -h -k $i "in" > "out${i}" || fail=1
Kamil Dudka 8b01f2
+  compare "exp${i}" "out${i}" || fail=1
Kamil Dudka 8b01f2
+done
Kamil Dudka 8b01f2
+
Kamil Dudka 8b01f2
+Exit $fail
Kamil Dudka 8b01f2
-- 
Kamil Dudka 8b01f2
2.5.5
Kamil Dudka 8b01f2
Kamil Dudka 8b01f2
Kamil Dudka 8b01f2
From 46ef53f558e7bc1c0bc0abd62a86b40b4141e058 Mon Sep 17 00:00:00 2001
Kamil Dudka 8b01f2
From: Kamil Dudka <kdudka@redhat.com>
Kamil Dudka 8b01f2
Date: Mon, 18 Jul 2016 19:04:45 +0200
Kamil Dudka 8b01f2
Subject: [PATCH 3/3] sort: with -h, disallow thousands separator between
Kamil Dudka 8b01f2
 number and unit
Kamil Dudka 8b01f2
MIME-Version: 1.0
Kamil Dudka 8b01f2
Content-Type: text/plain; charset=UTF-8
Kamil Dudka 8b01f2
Content-Transfer-Encoding: 8bit
Kamil Dudka 8b01f2
Kamil Dudka 8b01f2
* src/sort.c (traverse_raw_number): Accept thousands separator only
Kamil Dudka 8b01f2
if it is immediately followed by a digit.
Kamil Dudka 8b01f2
* tests/misc/sort-h-thousands-sep.sh: Cover the fix for this bug.
Kamil Dudka 8b01f2
Kamil Dudka 8b01f2
Suggested by Pádraig Brady in http://bugs.gnu.org/24015
Kamil Dudka 8b01f2
---
Kamil Dudka 8b01f2
 src/sort.c                         | 11 ++++++++++-
Kamil Dudka 8b01f2
 tests/misc/sort-h-thousands-sep.sh | 25 +++++++++++++------------
Kamil Dudka 8b01f2
 2 files changed, 23 insertions(+), 13 deletions(-)
Kamil Dudka 8b01f2
Kamil Dudka 8b01f2
diff --git a/src/sort.c b/src/sort.c
Kamil Dudka 8b01f2
index dd3ba58..69ef75f 100644
Kamil Dudka 8b01f2
--- a/src/sort.c
Kamil Dudka 8b01f2
+++ b/src/sort.c
Kamil Dudka 8b01f2
@@ -2241,6 +2241,7 @@ traverse_raw_number (char const **number)
Kamil Dudka 8b01f2
   char const *p = *number;
Kamil Dudka 8b01f2
   unsigned char ch;
Kamil Dudka 8b01f2
   unsigned char max_digit = '\0';
Kamil Dudka 8b01f2
+  bool ends_with_thousands_sep = false;
Kamil Dudka 8b01f2
 
Kamil Dudka 8b01f2
   /* Scan to end of number.
Kamil Dudka 8b01f2
      Decimals or separators not followed by digits stop the scan.
Kamil Dudka 8b01f2
@@ -2256,10 +2257,18 @@ traverse_raw_number (char const **number)
Kamil Dudka 8b01f2
       /* Allow to skip only one occurrence of thousands_sep to avoid finding
Kamil Dudka 8b01f2
          the unit in the next column in case thousands_sep matches as blank
Kamil Dudka 8b01f2
          and is used as column delimiter.  */
Kamil Dudka 8b01f2
-      if (*p == thousands_sep)
Kamil Dudka 8b01f2
+      ends_with_thousands_sep = (*p == thousands_sep);
Kamil Dudka 8b01f2
+      if (ends_with_thousands_sep)
Kamil Dudka 8b01f2
         ++p;
Kamil Dudka 8b01f2
     }
Kamil Dudka 8b01f2
 
Kamil Dudka 8b01f2
+  if (ends_with_thousands_sep)
Kamil Dudka 8b01f2
+    {
Kamil Dudka 8b01f2
+      /* thousands_sep not followed by digit is not allowed.  */
Kamil Dudka 8b01f2
+      *number = p - 2;
Kamil Dudka 8b01f2
+      return max_digit;
Kamil Dudka 8b01f2
+    }
Kamil Dudka 8b01f2
+
Kamil Dudka 8b01f2
   if (ch == decimal_point)
Kamil Dudka 8b01f2
     while (ISDIGIT (ch = *p++))
Kamil Dudka 8b01f2
       if (max_digit < ch)
Kamil Dudka 8b01f2
diff --git a/tests/misc/sort-h-thousands-sep.sh b/tests/misc/sort-h-thousands-sep.sh
Kamil Dudka 8b01f2
index 17f1b6c..3ffa89e 100755
Kamil Dudka 8b01f2
--- a/tests/misc/sort-h-thousands-sep.sh
Kamil Dudka 8b01f2
+++ b/tests/misc/sort-h-thousands-sep.sh
Kamil Dudka 8b01f2
@@ -18,28 +18,29 @@
Kamil Dudka 8b01f2
 
Kamil Dudka 8b01f2
 . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
Kamil Dudka 8b01f2
 print_ver_ sort
Kamil Dudka 8b01f2
+
Kamil Dudka 8b01f2
 test "$(LC_ALL=sv_SE locale thousands_sep)" = ' ' \
Kamil Dudka 8b01f2
   || skip_ 'The Swedish locale with blank thousands separator is unavailable.'
Kamil Dudka 8b01f2
 
Kamil Dudka 8b01f2
-tee exp1 > in << _EOF_
Kamil Dudka 8b01f2
-1       1k      4 003   1M
Kamil Dudka 8b01f2
-2k      2M      4 002   2
Kamil Dudka 8b01f2
-3M      3       4 001   3k
Kamil Dudka 8b01f2
+tee exp1 exp3 > in << _EOF_
Kamil Dudka 8b01f2
+1       1k      1 M     4 003   1M
Kamil Dudka 8b01f2
+2k      2M      2 k     4 002   2
Kamil Dudka 8b01f2
+3M      3       3 G     4 001   3k
Kamil Dudka 8b01f2
 _EOF_
Kamil Dudka 8b01f2
 
Kamil Dudka 8b01f2
 cat > exp2 << _EOF_
Kamil Dudka 8b01f2
-3M      3       4 001   3k
Kamil Dudka 8b01f2
-1       1k      4 003   1M
Kamil Dudka 8b01f2
-2k      2M      4 002   2
Kamil Dudka 8b01f2
+3M      3       3 G     4 001   3k
Kamil Dudka 8b01f2
+1       1k      1 M     4 003   1M
Kamil Dudka 8b01f2
+2k      2M      2 k     4 002   2
Kamil Dudka 8b01f2
 _EOF_
Kamil Dudka 8b01f2
 
Kamil Dudka 8b01f2
-cat > exp3 << _EOF_
Kamil Dudka 8b01f2
-3M      3       4 001   3k
Kamil Dudka 8b01f2
-2k      2M      4 002   2
Kamil Dudka 8b01f2
-1       1k      4 003   1M
Kamil Dudka 8b01f2
+cat > exp5 << _EOF_
Kamil Dudka 8b01f2
+3M      3       3 G     4 001   3k
Kamil Dudka 8b01f2
+2k      2M      2 k     4 002   2
Kamil Dudka 8b01f2
+1       1k      1 M     4 003   1M
Kamil Dudka 8b01f2
 _EOF_
Kamil Dudka 8b01f2
 
Kamil Dudka 8b01f2
-for i in 1 2 3; do
Kamil Dudka 8b01f2
+for i in 1 2 3 5; do
Kamil Dudka 8b01f2
   LC_ALL="sv_SE.utf8" sort -h -k $i "in" > "out${i}" || fail=1
Kamil Dudka 8b01f2
   compare "exp${i}" "out${i}" || fail=1
Kamil Dudka 8b01f2
 done
Kamil Dudka 8b01f2
-- 
Kamil Dudka 8b01f2
2.5.5
Kamil Dudka 8b01f2