13d99c
--- a/src/dfa.c
13d99c
+++ b/src/dfa.c
13d99c
@@ -1238,6 +1238,20 @@ parse_bracket_exp (void)
13d99c
   return CSET + charclass_index (ccl);
13d99c
 }
13d99c
 
13d99c
+#define PUSH_LEX_STATE(s)			\
13d99c
+  do						\
13d99c
+    {						\
13d99c
+      char const *lexptr_saved = lexptr;	\
13d99c
+      size_t lexleft_saved = lexleft;		\
13d99c
+      lexptr = (s);				\
13d99c
+      lexleft = strlen (lexptr)
13d99c
+
13d99c
+#define POP_LEX_STATE()				\
13d99c
+      lexptr = lexptr_saved;			\
13d99c
+      lexleft = lexleft_saved;			\
13d99c
+    }						\
13d99c
+  while (0)
13d99c
+
13d99c
 static token
13d99c
 lex (void)
13d99c
 {
13d99c
@@ -1485,20 +1499,6 @@ lex (void)
13d99c
               return lasttok = CSET + charclass_index (ccl);
13d99c
             }
13d99c
 
13d99c
-#define PUSH_LEX_STATE(s)			\
13d99c
-  do						\
13d99c
-    {						\
13d99c
-      char const *lexptr_saved = lexptr;	\
13d99c
-      size_t lexleft_saved = lexleft;		\
13d99c
-      lexptr = (s);				\
13d99c
-      lexleft = strlen (lexptr)
13d99c
-
13d99c
-#define POP_LEX_STATE()				\
13d99c
-      lexptr = lexptr_saved;			\
13d99c
-      lexleft = lexleft_saved;			\
13d99c
-    }						\
13d99c
-  while (0)
13d99c
-
13d99c
           /* FIXME: see if optimizing this, as is done with ANYCHAR and
13d99c
              add_utf8_anychar, makes sense.  */
13d99c
 
13d99c
@@ -1518,14 +1518,33 @@ lex (void)
13d99c
         case 'W':
13d99c
           if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
13d99c
             goto normal_char;
13d99c
-          zeroset (ccl);
13d99c
-          for (c2 = 0; c2 < NOTCHAR; ++c2)
13d99c
-            if (IS_WORD_CONSTITUENT (c2))
13d99c
-              setbit (c2, ccl);
13d99c
-          if (c == 'W')
13d99c
-            notset (ccl);
13d99c
+
13d99c
+          if (!dfa->multibyte)
13d99c
+            {
13d99c
+              zeroset (ccl);
13d99c
+              for (c2 = 0; c2 < NOTCHAR; ++c2)
13d99c
+                if (IS_WORD_CONSTITUENT (c2))
13d99c
+                  setbit (c2, ccl);
13d99c
+              if (c == 'W')
13d99c
+                notset (ccl);
13d99c
+              laststart = false;
13d99c
+              return lasttok = CSET + charclass_index (ccl);
13d99c
+            }
13d99c
+
13d99c
+          /* FIXME: see if optimizing this, as is done with ANYCHAR and
13d99c
+             add_utf8_anychar, makes sense.  */
13d99c
+
13d99c
+          /* \w and \W are documented to be equivalent to [_[:alnum:]] and
13d99c
+             [^_[:alnum:]] respectively, so tell the lexer to process those
13d99c
+             strings, each minus its "already processed" '['.  */
13d99c
+          PUSH_LEX_STATE (c == 'w' ? "_[:alnum:]]" : "^_[:alnum:]]");
13d99c
+
13d99c
+          lasttok = parse_bracket_exp ();
13d99c
+
13d99c
+          POP_LEX_STATE ();
13d99c
+
13d99c
           laststart = false;
13d99c
-          return lasttok = CSET + charclass_index (ccl);
13d99c
+          return lasttok;
13d99c
 
13d99c
         case '[':
13d99c
           if (backslash)
13d99c
--- a/tests/Makefile.am
13d99c
+++ b/tests/Makefile.am
13d99c
@@ -110,6 +110,7 @@ TESTS =						\
13d99c
   warn-char-classes				\
13d99c
   word-delim-multibyte				\
13d99c
   word-multi-file				\
13d99c
+  word-multibyte				\
13d99c
   yesno
13d99c
 
13d99c
 EXTRA_DIST =					\
13d99c
--- a/tests/Makefile.in
13d99c
+++ b/tests/Makefile.in
13d99c
@@ -1409,6 +1409,7 @@ TESTS = \
13d99c
   warn-char-classes				\
13d99c
   word-delim-multibyte				\
13d99c
   word-multi-file				\
13d99c
+  word-multibyte				\
13d99c
   yesno
13d99c
 
13d99c
 EXTRA_DIST = \
13d99c
@@ -2286,6 +2287,13 @@ word-multi-file.log: word-multi-file
13d99c
 	--log-file $$b.log --trs-file $$b.trs \
13d99c
 	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
13d99c
 	"$$tst" $(AM_TESTS_FD_REDIRECT)
13d99c
+word-multibyte.log: word-multibyte
13d99c
+	@p='word-multibyte'; \
13d99c
+	b='word-multibyte'; \
13d99c
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
13d99c
+	--log-file $$b.log --trs-file $$b.trs \
13d99c
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
13d99c
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
13d99c
 yesno.log: yesno
13d99c
 	@p='yesno'; \
13d99c
 	b='yesno'; \
13d99c
--- a/dev/null
13d99c
+++ a/tests/word-multibyte
13d99c
@@ -0,0 +1,23 @@ 
13d99c
+#!/bin/sh
13d99c
+# This would fail for grep-2.20
13d99c
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
13d99c
+
13d99c
+require_en_utf8_locale_
13d99c
+
13d99c
+printf '\xc3\xa1\n' > in || framework_failure_
13d99c
+LC_ALL=en_US.UTF-8
13d99c
+export LC_ALL
13d99c
+
13d99c
+fail=0
13d99c
+
13d99c
+for LOC in en_US.UTF-8 zh_CN $LOCALE_FR_UTF8; do
13d99c
+  out=out1-$LOC
13d99c
+  LC_ALL=$LOC grep '\w' in >$out || fail=1
13d99c
+  compare in $out || fail=1
13d99c
+
13d99c
+  out=out2-$LOC
13d99c
+  LC_ALL=$LOC grep '\W' in >$out && fail=1
13d99c
+  compare /dev/null $out || fail=1
13d99c
+done
13d99c
+
13d99c
+Exit $fail