Blame SOURCES/gcc11-Wbidi-chars.patch

e60d6e
commit 51c500269bf53749b107807d84271385fad35628
e60d6e
Author: Marek Polacek <polacek@redhat.com>
e60d6e
Date:   Wed Oct 6 14:33:59 2021 -0400
e60d6e
e60d6e
    libcpp: Implement -Wbidi-chars for CVE-2021-42574 [PR103026]
e60d6e
    
e60d6e
    From a link below:
e60d6e
    "An issue was discovered in the Bidirectional Algorithm in the Unicode
e60d6e
    Specification through 14.0. It permits the visual reordering of
e60d6e
    characters via control sequences, which can be used to craft source code
e60d6e
    that renders different logic than the logical ordering of tokens
e60d6e
    ingested by compilers and interpreters. Adversaries can leverage this to
e60d6e
    encode source code for compilers accepting Unicode such that targeted
e60d6e
    vulnerabilities are introduced invisibly to human reviewers."
e60d6e
    
e60d6e
    More info:
e60d6e
    https://nvd.nist.gov/vuln/detail/CVE-2021-42574
e60d6e
    https://trojansource.codes/
e60d6e
    
e60d6e
    This is not a compiler bug.  However, to mitigate the problem, this patch
e60d6e
    implements -Wbidi-chars=[none|unpaired|any] to warn about possibly
e60d6e
    misleading Unicode bidirectional control characters the preprocessor may
e60d6e
    encounter.
e60d6e
    
e60d6e
    The default is =unpaired, which warns about improperly terminated
e60d6e
    bidirectional control characters; e.g. a LRE without its corresponding PDF.
e60d6e
    The level =any warns about any use of bidirectional control characters.
e60d6e
    
e60d6e
    This patch handles both UCNs and UTF-8 characters.  UCNs designating
e60d6e
    bidi characters in identifiers are accepted since r204886.  Then r217144
e60d6e
    enabled -fextended-identifiers by default.  Extended characters in C/C++
e60d6e
    identifiers have been accepted since r275979.  However, this patch still
e60d6e
    warns about mixing UTF-8 and UCN bidi characters; there seems to be no
e60d6e
    good reason to allow mixing them.
e60d6e
    
e60d6e
    We warn in different contexts: comments (both C and C++-style), string
e60d6e
    literals, character constants, and identifiers.  Expectedly, UCNs are ignored
e60d6e
    in comments and raw string literals.  The bidirectional control characters
e60d6e
    can nest so this patch handles that as well.
e60d6e
    
e60d6e
    I have not included nor tested this at all with Fortran (which also has
e60d6e
    string literals and line comments).
e60d6e
    
e60d6e
    Dave M. posted patches improving diagnostic involving Unicode characters.
e60d6e
    This patch does not make use of this new infrastructure yet.
e60d6e
    
e60d6e
            PR preprocessor/103026
e60d6e
    
e60d6e
    gcc/c-family/ChangeLog:
e60d6e
    
e60d6e
            * c.opt (Wbidi-chars, Wbidi-chars=): New option.
e60d6e
    
e60d6e
    gcc/ChangeLog:
e60d6e
    
e60d6e
            * doc/invoke.texi: Document -Wbidi-chars.
e60d6e
    
e60d6e
    libcpp/ChangeLog:
e60d6e
    
e60d6e
            * include/cpplib.h (enum cpp_bidirectional_level): New.
e60d6e
            (struct cpp_options): Add cpp_warn_bidirectional.
e60d6e
            (enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL.
e60d6e
            * internal.h (struct cpp_reader): Add warn_bidi_p member
e60d6e
            function.
e60d6e
            * init.c (cpp_create_reader): Set cpp_warn_bidirectional.
e60d6e
            * lex.c (bidi): New namespace.
e60d6e
            (get_bidi_utf8): New function.
e60d6e
            (get_bidi_ucn): Likewise.
e60d6e
            (maybe_warn_bidi_on_close): Likewise.
e60d6e
            (maybe_warn_bidi_on_char): Likewise.
e60d6e
            (_cpp_skip_block_comment): Implement warning about bidirectional
e60d6e
            control characters.
e60d6e
            (skip_line_comment): Likewise.
e60d6e
            (forms_identifier_p): Likewise.
e60d6e
            (lex_identifier): Likewise.
e60d6e
            (lex_string): Likewise.
e60d6e
            (lex_raw_string): Likewise.
e60d6e
    
e60d6e
    gcc/testsuite/ChangeLog:
e60d6e
    
e60d6e
            * c-c++-common/Wbidi-chars-1.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-2.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-3.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-4.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-5.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-6.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-7.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-8.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-9.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-10.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-11.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-12.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-13.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-14.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-15.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-16.c: New test.
e60d6e
            * c-c++-common/Wbidi-chars-17.c: New test.
e60d6e
e60d6e
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
e60d6e
index 8a4cd634f77..3976fc368db 100644
e60d6e
--- a/gcc/c-family/c.opt
e60d6e
+++ b/gcc/c-family/c.opt
e60d6e
@@ -374,6 +374,30 @@ Wbad-function-cast
e60d6e
 C ObjC Var(warn_bad_function_cast) Warning
e60d6e
 Warn about casting functions to incompatible types.
e60d6e
 
e60d6e
+Wbidi-chars
e60d6e
+C ObjC C++ ObjC++ Warning Alias(Wbidi-chars=,any,none)
e60d6e
+;
e60d6e
+
e60d6e
+Wbidi-chars=
e60d6e
+C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level)
e60d6e
+-Wbidi-chars=[none|unpaired|any] Warn about UTF-8 bidirectional control characters.
e60d6e
+
e60d6e
+; Required for these enum values.
e60d6e
+SourceInclude
e60d6e
+cpplib.h
e60d6e
+
e60d6e
+Enum
e60d6e
+Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidi-chars%> not recognized)
e60d6e
+
e60d6e
+EnumValue
e60d6e
+Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none)
e60d6e
+
e60d6e
+EnumValue
e60d6e
+Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired)
e60d6e
+
e60d6e
+EnumValue
e60d6e
+Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any)
e60d6e
+
e60d6e
 Wbool-compare
e60d6e
 C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall)
e60d6e
 Warn about boolean expression compared with an integer value different from true/false.
e60d6e
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
e60d6e
index 6070288856c..a22758d18ee 100644
e60d6e
--- a/gcc/doc/invoke.texi
e60d6e
+++ b/gcc/doc/invoke.texi
e60d6e
@@ -325,7 +325,9 @@ Objective-C and Objective-C++ Dialects}.
e60d6e
 -Warith-conversion @gol
e60d6e
 -Warray-bounds  -Warray-bounds=@var{n} @gol
e60d6e
 -Wno-attributes  -Wattribute-alias=@var{n} -Wno-attribute-alias @gol
e60d6e
--Wno-attribute-warning  -Wbool-compare  -Wbool-operation @gol
e60d6e
+-Wno-attribute-warning  @gol
e60d6e
+-Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol
e60d6e
+-Wbool-compare  -Wbool-operation @gol
e60d6e
 -Wno-builtin-declaration-mismatch @gol
e60d6e
 -Wno-builtin-macro-redefined  -Wc90-c99-compat  -Wc99-c11-compat @gol
e60d6e
 -Wc11-c2x-compat @gol
e60d6e
@@ -7557,6 +7559,23 @@ Attributes considered include @code{alloc_align}, @code{alloc_size},
e60d6e
 This is the default.  You can disable these warnings with either
e60d6e
 @option{-Wno-attribute-alias} or @option{-Wattribute-alias=0}.
e60d6e
 
e60d6e
+@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]}
e60d6e
+@opindex Wbidi-chars=
e60d6e
+@opindex Wbidi-chars
e60d6e
+@opindex Wno-bidi-chars
e60d6e
+Warn about possibly misleading UTF-8 bidirectional control characters in
e60d6e
+comments, string literals, character constants, and identifiers.  Such
e60d6e
+characters can change left-to-right writing direction into right-to-left
e60d6e
+(and vice versa), which can cause confusion between the logical order and
e60d6e
+visual order.  This may be dangerous; for instance, it may seem that a piece
e60d6e
+of code is not commented out, whereas it in fact is.
e60d6e
+
e60d6e
+There are three levels of warning supported by GCC@.  The default is
e60d6e
+@option{-Wbidi-chars=unpaired}, which warns about improperly terminated
e60d6e
+bidi contexts.  @option{-Wbidi-chars=none} turns the warning off.
e60d6e
+@option{-Wbidi-chars=any} warns about any use of bidirectional control
e60d6e
+characters.
e60d6e
+
e60d6e
 @item -Wbool-compare
e60d6e
 @opindex Wno-bool-compare
e60d6e
 @opindex Wbool-compare
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-1.c b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..34f5ac19271
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c
e60d6e
@@ -0,0 +1,12 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile } */
e60d6e
+
e60d6e
+int main() {
e60d6e
+    int isAdmin = 0;
e60d6e
+    /*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
e60d6e
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
e60d6e
+        __builtin_printf("You are an admin.\n");
e60d6e
+    /* end admins only ‮ { ⁦*/
e60d6e
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
e60d6e
+    return 0;
e60d6e
+}
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..3f851b69e65
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c
e60d6e
@@ -0,0 +1,27 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile } */
e60d6e
+/* { dg-options "-Wbidi-chars=unpaired" } */
e60d6e
+/* More nesting testing.  */
e60d6e
+
e60d6e
+/* RLE‫ LRI⁦ PDF‬ PDI⁩*/
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int LRE_\u202a_PDF_\u202c;
e60d6e
+int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c;
e60d6e
+int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c;
e60d6e
+int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c;
e60d6e
+int FSI_\u2068;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int FSI_\u2068_PDI_\u2069;
e60d6e
+int FSI_\u2068_FSI_\u2068_PDI_\u2069;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
e60d6e
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..270ce2368a9
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c
e60d6e
@@ -0,0 +1,13 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile } */
e60d6e
+/* { dg-options "-Wbidi-chars=unpaired" } */
e60d6e
+/* Test that we warn when mixing UCN and UTF-8.  */
e60d6e
+
e60d6e
+int LRE_‪_PDF_\u202c;
e60d6e
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
e60d6e
+int LRE_\u202a_PDF_‬_;
e60d6e
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
e60d6e
+const char *s1 = "LRE_‪_PDF_\u202c";
e60d6e
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
e60d6e
+const char *s2 = "LRE_\u202a_PDF_‬";
e60d6e
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-12.c b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..b07eec1da91
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c
e60d6e
@@ -0,0 +1,19 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile { target { c || c++11 } } } */
e60d6e
+/* { dg-options "-Wbidi-chars=any" } */
e60d6e
+/* Test raw strings.  */
e60d6e
+
e60d6e
+const char *s1 = R"(a b c LRE‪ 1 2 3 PDF‬ x y z)";
e60d6e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
e60d6e
+const char *s2 = R"(a b c RLE‫ 1 2 3 PDF‬ x y z)";
e60d6e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
e60d6e
+const char *s3 = R"(a b c LRO‭ 1 2 3 PDF‬ x y z)";
e60d6e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
e60d6e
+const char *s4 = R"(a b c RLO‮ 1 2 3 PDF‬ x y z)";
e60d6e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
e60d6e
+const char *s7 = R"(a b c FSI⁨ 1 2 3 PDI⁩ x y) z";
e60d6e
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
e60d6e
+const char *s8 = R"(a b c PDI⁩ x y )z";
e60d6e
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
e60d6e
+const char *s9 = R"(a b c PDF‬ x y z)";
e60d6e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-13.c b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..b2dd9fde752
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c
e60d6e
@@ -0,0 +1,17 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile { target { c || c++11 } } } */
e60d6e
+/* { dg-options "-Wbidi-chars=unpaired" } */
e60d6e
+/* Test raw strings.  */
e60d6e
+
e60d6e
+const char *s1 = R"(a b c LRE‪ 1 2 3)";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+const char *s2 = R"(a b c RLE‫ 1 2 3)";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+const char *s3 = R"(a b c LRO‭ 1 2 3)";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+const char *s4 = R"(a b c FSI⁨ 1 2 3)";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+const char *s5 = R"(a b c LRI⁦ 1 2 3)";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+const char *s6 = R"(a b c RLI⁧ 1 2 3)";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..ba5f75d9553
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c
e60d6e
@@ -0,0 +1,38 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile } */
e60d6e
+/* { dg-options "-Wbidi-chars=unpaired" } */
e60d6e
+/* Test PDI handling, which also pops any subsequent LREs, RLEs, LROs,
e60d6e
+   or RLOs.  */
e60d6e
+
e60d6e
+/* LRI_⁦_LRI_⁦_RLE_‫_RLE_‫_RLE_‫_PDI_⁩*/
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// LRI_⁦_RLE_‫_RLE_‫_RLE_‫_PDI_⁩
e60d6e
+// LRI_⁦_RLO_‮_RLE_‫_RLE_‫_PDI_⁩
e60d6e
+// LRI_⁦_RLO_‮_RLE_‫_PDI_⁩
e60d6e
+// FSI_⁨_RLO_‮_PDI_⁩
e60d6e
+// FSI_⁨_FSI_⁨_RLO_‮_PDI_⁩
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
e60d6e
+int LRI_\u2066_LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int PDI_\u2069;
e60d6e
+int LRI_\u2066_PDI_\u2069;
e60d6e
+int RLI_\u2067_PDI_\u2069;
e60d6e
+int LRE_\u202a_LRI_\u2066_PDI_\u2069;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int LRI_\u2066_LRE_\u202a_PDF_\u202c_PDI_\u2069;
e60d6e
+int LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
e60d6e
+int RLI_\u2067_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int FSI_\u2068_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int RLO_\u202e_PDI_\u2069;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int RLI_\u2067_PDI_\u2069_RLI_\u2067;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int FSI_\u2068_PDF_\u202c_PDI_\u2069;
e60d6e
+int FSI_\u2068_FSI_\u2068_PDF_\u202c_PDI_\u2069;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-15.c b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..a0ce8ff5e2c
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c
e60d6e
@@ -0,0 +1,59 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile } */
e60d6e
+/* { dg-options "-Wbidi-chars=unpaired" } */
e60d6e
+/* Test unpaired bidi control chars in multiline comments.  */
e60d6e
+
e60d6e
+/*
e60d6e
+ * LRE‪ end
e60d6e
+ */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+/*
e60d6e
+ * RLE‫ end
e60d6e
+ */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+/*
e60d6e
+ * LRO‭ end
e60d6e
+ */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+/*
e60d6e
+ * RLO‮ end
e60d6e
+ */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+/*
e60d6e
+ * LRI⁦ end
e60d6e
+ */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+/*
e60d6e
+ * RLI⁧ end
e60d6e
+ */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+/*
e60d6e
+ * FSI⁨ end
e60d6e
+ */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+/* LRE‪
e60d6e
+   PDF‬ */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+/* FSI⁨
e60d6e
+   PDI⁩ */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+
e60d6e
+/* LRE<‪>
e60d6e
+ *
e60d6e
+ */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-3 } */
e60d6e
+
e60d6e
+/*
e60d6e
+ * LRE<‪>
e60d6e
+ */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+
e60d6e
+/*
e60d6e
+ *
e60d6e
+ * LRE<‪> */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+/* RLI<⁧> */ /* PDI<⁩> */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* LRE<‪> */ /* PDF<‬> */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..baa0159861c
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c
e60d6e
@@ -0,0 +1,26 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile } */
e60d6e
+/* { dg-options "-Wbidi-chars=any" } */
e60d6e
+/* Test LTR/RTL chars.  */
e60d6e
+
e60d6e
+/* LTR<‎> */
e60d6e
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
e60d6e
+// LTR<‎>
e60d6e
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
e60d6e
+/* RTL<‏> */
e60d6e
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
e60d6e
+// RTL<‏>
e60d6e
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+const char *s1 = "LTR<‎>";
e60d6e
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
e60d6e
+const char *s2 = "LTR\u200e";
e60d6e
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
e60d6e
+const char *s3 = "LTR\u200E";
e60d6e
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
e60d6e
+const char *s4 = "RTL<‏>";
e60d6e
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
e60d6e
+const char *s5 = "RTL\u200f";
e60d6e
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
e60d6e
+const char *s6 = "RTL\u200F";
e60d6e
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..07cb4321f96
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c
e60d6e
@@ -0,0 +1,30 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile } */
e60d6e
+/* { dg-options "-Wbidi-chars=unpaired" } */
e60d6e
+/* Test LTR/RTL chars.  */
e60d6e
+
e60d6e
+/* LTR<‎> */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// LTR<‎>
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* RTL<‏> */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// RTL<‏>
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int ltr_\u200e;
e60d6e
+/* { dg-error "universal character " "" { target *-*-* } .-1 } */
e60d6e
+int rtl_\u200f;
e60d6e
+/* { dg-error "universal character " "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+const char *s1 = "LTR<‎>";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+const char *s2 = "LTR\u200e";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+const char *s3 = "LTR\u200E";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+const char *s4 = "RTL<‏>";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+const char *s5 = "RTL\u200f";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+const char *s6 = "RTL\u200F";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-2.c b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..2340374f276
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c
e60d6e
@@ -0,0 +1,9 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile } */
e60d6e
+
e60d6e
+int main() {
e60d6e
+    /* Say hello; newline⁧/*/ return 0 ;
e60d6e
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
e60d6e
+    __builtin_printf("Hello world.\n");
e60d6e
+    return 0;
e60d6e
+}
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-3.c b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..9dc7edb6e64
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c
e60d6e
@@ -0,0 +1,11 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile } */
e60d6e
+
e60d6e
+int main() {
e60d6e
+    const char* access_level = "user";
e60d6e
+    if (__builtin_strcmp(access_level, "user‮ ⁦// Check if admin⁩ ⁦")) {
e60d6e
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
e60d6e
+        __builtin_printf("You are an admin.\n");
e60d6e
+    }
e60d6e
+    return 0;
e60d6e
+}
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..639e5c62e88
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c
e60d6e
@@ -0,0 +1,188 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile } */
e60d6e
+/* { dg-options "-Wbidi-chars=any -Wno-multichar -Wno-overflow" } */
e60d6e
+/* Test all bidi chars in various contexts (identifiers, comments,
e60d6e
+   string literals, character constants), both UCN and UTF-8.  The bidi
e60d6e
+   chars here are properly terminated, except for the character constants.  */
e60d6e
+
e60d6e
+/* a b c LRE‪ 1 2 3 PDF‬ x y z */
e60d6e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c RLE‫ 1 2 3 PDF‬ x y z */
e60d6e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c LRO‭ 1 2 3 PDF‬ x y z */
e60d6e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c RLO‮ 1 2 3 PDF‬ x y z */
e60d6e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */
e60d6e
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c RLI⁧ 1 2 3 PDI⁩ x y */
e60d6e
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */
e60d6e
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+/* Same but C++ comments instead.  */
e60d6e
+// a b c LRE‪ 1 2 3 PDF‬ x y z
e60d6e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
e60d6e
+// a b c RLE‫ 1 2 3 PDF‬ x y z
e60d6e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
e60d6e
+// a b c LRO‭ 1 2 3 PDF‬ x y z
e60d6e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
e60d6e
+// a b c RLO‮ 1 2 3 PDF‬ x y z
e60d6e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
e60d6e
+// a b c LRI⁦ 1 2 3 PDI⁩ x y z
e60d6e
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
e60d6e
+// a b c RLI⁧ 1 2 3 PDI⁩ x y
e60d6e
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
e60d6e
+// a b c FSI⁨ 1 2 3 PDI⁩ x y z
e60d6e
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+/* Here we're closing an unopened context, warn when =any.  */
e60d6e
+/* a b c PDI⁩ x y z */
e60d6e
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c PDF‬ x y z */
e60d6e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
e60d6e
+// a b c PDI⁩ x y z
e60d6e
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
e60d6e
+// a b c PDF‬ x y z
e60d6e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+/* Multiline comments.  */
e60d6e
+/* a b c PDI⁩ x y z
e60d6e
+   */
e60d6e
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
e60d6e
+/* a b c PDF‬ x y z
e60d6e
+   */
e60d6e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
e60d6e
+/* first
e60d6e
+   a b c PDI⁩ x y z
e60d6e
+   */
e60d6e
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
e60d6e
+/* first
e60d6e
+   a b c PDF‬ x y z
e60d6e
+   */
e60d6e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
e60d6e
+/* first
e60d6e
+   a b c PDI⁩ x y z */
e60d6e
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
e60d6e
+/* first
e60d6e
+   a b c PDF‬ x y z */
e60d6e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+void
e60d6e
+g1 ()
e60d6e
+{
e60d6e
+  const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z";
e60d6e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z";
e60d6e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z";
e60d6e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z";
e60d6e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z";
e60d6e
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z";
e60d6e
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z";
e60d6e
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s8 = "a b c PDI⁩ x y z";
e60d6e
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s9 = "a b c PDF‬ x y z";
e60d6e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+  const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
e60d6e
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
e60d6e
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
e60d6e
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
e60d6e
+}
e60d6e
+
e60d6e
+void
e60d6e
+g2 ()
e60d6e
+{
e60d6e
+  const char c1 = '\u202a';
e60d6e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
e60d6e
+  const char c2 = '\u202A';
e60d6e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
e60d6e
+  const char c3 = '\u202b';
e60d6e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
e60d6e
+  const char c4 = '\u202B';
e60d6e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
e60d6e
+  const char c5 = '\u202d';
e60d6e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
e60d6e
+  const char c6 = '\u202D';
e60d6e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
e60d6e
+  const char c7 = '\u202e';
e60d6e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
e60d6e
+  const char c8 = '\u202E';
e60d6e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
e60d6e
+  const char c9 = '\u2066';
e60d6e
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
e60d6e
+  const char c10 = '\u2067';
e60d6e
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
e60d6e
+  const char c11 = '\u2068';
e60d6e
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
e60d6e
+}
e60d6e
+
e60d6e
+int a‪b‬c;
e60d6e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
e60d6e
+int a‫b‬c;
e60d6e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
e60d6e
+int a‭b‬c;
e60d6e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
e60d6e
+int a‮b‬c;
e60d6e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
e60d6e
+int a⁦b⁩c;
e60d6e
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
e60d6e
+int a⁧b⁩c;
e60d6e
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
e60d6e
+int a⁨b⁩c;
e60d6e
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
e60d6e
+int A‬X;
e60d6e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
e60d6e
+int A\u202cY;
e60d6e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
e60d6e
+int A\u202CY2;
e60d6e
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+int d\u202ae\u202cf;
e60d6e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
e60d6e
+int d\u202Ae\u202cf2;
e60d6e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
e60d6e
+int d\u202be\u202cf;
e60d6e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
e60d6e
+int d\u202Be\u202cf2;
e60d6e
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
e60d6e
+int d\u202de\u202cf;
e60d6e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
e60d6e
+int d\u202De\u202cf2;
e60d6e
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
e60d6e
+int d\u202ee\u202cf;
e60d6e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
e60d6e
+int d\u202Ee\u202cf2;
e60d6e
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
e60d6e
+int d\u2066e\u2069f;
e60d6e
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
e60d6e
+int d\u2067e\u2069f;
e60d6e
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
e60d6e
+int d\u2068e\u2069f;
e60d6e
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
e60d6e
+int X\u2069;
e60d6e
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..68cb053144b
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c
e60d6e
@@ -0,0 +1,188 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile } */
e60d6e
+/* { dg-options "-Wbidi-chars=unpaired -Wno-multichar -Wno-overflow" } */
e60d6e
+/* Test all bidi chars in various contexts (identifiers, comments,
e60d6e
+   string literals, character constants), both UCN and UTF-8.  The bidi
e60d6e
+   chars here are properly terminated, except for the character constants.  */
e60d6e
+
e60d6e
+/* a b c LRE‪ 1 2 3 PDF‬ x y z */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c RLE‫ 1 2 3 PDF‬ x y z */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c LRO‭ 1 2 3 PDF‬ x y z */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c RLO‮ 1 2 3 PDF‬ x y z */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c RLI⁧ 1 2 3 PDI⁩ x y */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+/* Same but C++ comments instead.  */
e60d6e
+// a b c LRE‪ 1 2 3 PDF‬ x y z
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// a b c RLE‫ 1 2 3 PDF‬ x y z
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// a b c LRO‭ 1 2 3 PDF‬ x y z
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// a b c RLO‮ 1 2 3 PDF‬ x y z
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// a b c LRI⁦ 1 2 3 PDI⁩ x y z
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// a b c RLI⁧ 1 2 3 PDI⁩ x y
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// a b c FSI⁨ 1 2 3 PDI⁩ x y z
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+/* Here we're closing an unopened context, warn when =any.  */
e60d6e
+/* a b c PDI⁩ x y z */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c PDF‬ x y z */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// a b c PDI⁩ x y z
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// a b c PDF‬ x y z
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+/* Multiline comments.  */
e60d6e
+/* a b c PDI⁩ x y z
e60d6e
+   */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+/* a b c PDF‬ x y z
e60d6e
+   */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+/* first
e60d6e
+   a b c PDI⁩ x y z
e60d6e
+   */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+/* first
e60d6e
+   a b c PDF‬ x y z
e60d6e
+   */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+/* first
e60d6e
+   a b c PDI⁩ x y z */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* first
e60d6e
+   a b c PDF‬ x y z */
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+void
e60d6e
+g1 ()
e60d6e
+{
e60d6e
+  const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s8 = "a b c PDI⁩ x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s9 = "a b c PDF‬ x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+  const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+}
e60d6e
+
e60d6e
+void
e60d6e
+g2 ()
e60d6e
+{
e60d6e
+  const char c1 = '\u202a';
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char c2 = '\u202A';
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char c3 = '\u202b';
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char c4 = '\u202B';
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char c5 = '\u202d';
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char c6 = '\u202D';
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char c7 = '\u202e';
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char c8 = '\u202E';
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char c9 = '\u2066';
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char c10 = '\u2067';
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char c11 = '\u2068';
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+}
e60d6e
+
e60d6e
+int a‪b‬c;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int a‫b‬c;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int a‭b‬c;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int a‮b‬c;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int a⁦b⁩c;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int a⁧b⁩c;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int a⁨b⁩c;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int A‬X;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int A\u202cY;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int A\u202CY2;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+int d\u202ae\u202cf;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int d\u202Ae\u202cf2;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int d\u202be\u202cf;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int d\u202Be\u202cf2;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int d\u202de\u202cf;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int d\u202De\u202cf2;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int d\u202ee\u202cf;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int d\u202Ee\u202cf2;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int d\u2066e\u2069f;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int d\u2067e\u2069f;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int d\u2068e\u2069f;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int X\u2069;
e60d6e
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..0ce6fff2dee
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c
e60d6e
@@ -0,0 +1,155 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile } */
e60d6e
+/* { dg-options "-Wbidi-chars=unpaired" } */
e60d6e
+/* Test nesting of bidi chars in various contexts.  */
e60d6e
+
e60d6e
+/* Terminated by the wrong char:  */
e60d6e
+/* a b c LRE‪ 1 2 3 PDI⁩ x y z */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c RLE‫ 1 2 3 PDI⁩ x y  z*/
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c LRO‭ 1 2 3 PDI⁩ x y z */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c RLO‮ 1 2 3 PDI⁩ x y z */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c LRI⁦ 1 2 3 PDF‬ x y z */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c RLI⁧ 1 2 3 PDF‬ x y z */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* a b c FSI⁨ 1 2 3 PDF‬ x y  z*/
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+/* LRE‪ PDF‬ */
e60d6e
+/* LRE‪ LRE‪ PDF‬ PDF‬ */
e60d6e
+/* PDF‬ LRE‪ PDF‬ */
e60d6e
+/* LRE‪ PDF‬ LRE‪ PDF‬ */
e60d6e
+/* LRE‪ LRE‪ PDF‬ */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* PDF‬ LRE‪ */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+// a b c LRE‪ 1 2 3 PDI⁩ x y z
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// a b c RLE‫ 1 2 3 PDI⁩ x y  z*/
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// a b c LRO‭ 1 2 3 PDI⁩ x y z 
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// a b c RLO‮ 1 2 3 PDI⁩ x y z 
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// a b c LRI⁦ 1 2 3 PDF‬ x y z 
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// a b c RLI⁧ 1 2 3 PDF‬ x y z 
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// a b c FSI⁨ 1 2 3 PDF‬ x y  z
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+// LRE‪ PDF‬ 
e60d6e
+// LRE‪ LRE‪ PDF‬ PDF‬
e60d6e
+// PDF‬ LRE‪ PDF‬
e60d6e
+// LRE‪ PDF‬ LRE‪ PDF‬
e60d6e
+// LRE‪ LRE‪ PDF‬
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+// PDF‬ LRE‪
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+void
e60d6e
+g1 ()
e60d6e
+{
e60d6e
+  const char *s1 = "a b c LRE‪ 1 2 3 PDI⁩ x y z";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s3 = "a b c RLE‫ 1 2 3 PDI⁩ x y ";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s5 = "a b c LRO‭ 1 2 3 PDI⁩ x y z";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s7 = "a b c RLO‮ 1 2 3 PDI⁩ x y z";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s9 = "a b c LRI⁦ 1 2 3 PDF‬ x y z";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s11 = "a b c RLI⁧ 1 2 3 PDF‬ x y z\
e60d6e
+    ";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+  const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s13 = "a b c FSI⁨ 1 2 3 PDF‬ x y z";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s15 = "PDF‬ LRE‪";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s16 = "PDF\u202c LRE\u202a";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s17 = "LRE‪ PDF‬";
e60d6e
+  const char *s18 = "LRE\u202a PDF\u202c";
e60d6e
+  const char *s19 = "LRE‪ LRE‪ PDF‬ PDF‬";
e60d6e
+  const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c";
e60d6e
+  const char *s21 = "PDF‬ LRE‪ PDF‬";
e60d6e
+  const char *s22 = "PDF\u202c LRE\u202a PDF\u202c";
e60d6e
+  const char *s23 = "LRE‪ LRE‪ PDF‬";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s24 = "LRE\u202a LRE\u202a PDF\u202c";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s25 = "PDF‬ LRE‪";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s26 = "PDF\u202c LRE\u202a";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s27 = "PDF‬ LRE\u202a";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+  const char *s28 = "PDF\u202c LRE‪";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+}
e60d6e
+
e60d6e
+int aLRE‪bPDI⁩;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int A\u202aB\u2069C;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int aRLE‫bPDI⁩;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int a\u202bB\u2069c;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int aLRO‭bPDI⁩;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int a\u202db\u2069c2;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int aRLO‮bPDI⁩;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int a\u202eb\u2069;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int aLRI⁦bPDF‬;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int a\u2066b\u202c;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int aRLI⁧bPDF‬c
e60d6e
+;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
e60d6e
+int a\u2067b\u202c;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int aFSI⁨bPDF‬;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int a\u2068b\u202c;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int aFSI⁨bPD\u202C;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int aFSI\u2068bPDF‬_;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int aLRE‪bPDF‬b; 
e60d6e
+int A\u202aB\u202c;
e60d6e
+int a_LRE‪_LRE‪_b_PDF‬_PDF‬;
e60d6e
+int A\u202aA\u202aB\u202cB\u202c;
e60d6e
+int aPDF‬bLREadPDF‬;
e60d6e
+int a_\u202C_\u202a_\u202c;
e60d6e
+int a_LRE‪_b_PDF‬_c_LRE‪_PDF‬;
e60d6e
+int a_\u202a_\u202c_\u202a_\u202c_;
e60d6e
+int a_LRE‪_b_PDF‬_c_LRE‪;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int a_\u202a_\u202c_\u202a_;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..d012d420ec0
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c
e60d6e
@@ -0,0 +1,9 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile } */
e60d6e
+/* { dg-options "-Wbidi-chars=any" } */
e60d6e
+/* Test we ignore UCNs in comments.  */
e60d6e
+
e60d6e
+// a b c \u202a 1 2 3
e60d6e
+// a b c \u202A 1 2 3
e60d6e
+/* a b c \u202a 1 2 3 */
e60d6e
+/* a b c \u202A 1 2 3 */
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..4f54c5092ec
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c
e60d6e
@@ -0,0 +1,13 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile } */
e60d6e
+/* { dg-options "-Wbidi-chars=any" } */
e60d6e
+/* Test \u vs \U.  */
e60d6e
+
e60d6e
+int a_\u202A;
e60d6e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
e60d6e
+int a_\u202a_2;
e60d6e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
e60d6e
+int a_\U0000202A_3;
e60d6e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
e60d6e
+int a_\U0000202a_4;
e60d6e
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
e60d6e
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c
e60d6e
new file mode 100644
e60d6e
index 00000000000..e2af1b1ca97
e60d6e
--- /dev/null
e60d6e
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c
e60d6e
@@ -0,0 +1,29 @@
e60d6e
+/* PR preprocessor/103026 */
e60d6e
+/* { dg-do compile } */
e60d6e
+/* { dg-options "-Wbidi-chars=unpaired" } */
e60d6e
+/* Test that we properly separate bidi contexts (comment/identifier/character
e60d6e
+   constant/string literal).  */
e60d6e
+
e60d6e
+/* LRE ->‪<- */ int pdf_\u202c_1;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* RLE ->‫<- */ int pdf_\u202c_2;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* LRO ->‭<- */ int pdf_\u202c_3;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* RLO ->‮<- */ int pdf_\u202c_4;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* LRI ->⁦<-*/ int pdi_\u2069_1;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* RLI ->⁧<- */ int pdi_\u2069_12;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* FSI ->⁨<- */ int pdi_\u2069_3;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+
e60d6e
+const char *s1 = "LRE\u202a"; /* PDF ->‬<- */
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+/* LRE ->‪<- */ const char *s2 = "PDF\u202c";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+const char *s3 = "LRE\u202a"; int pdf_\u202c_5;
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
+int lre_\u202a; const char *s4 = "PDF\u202c";
e60d6e
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
e60d6e
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
e60d6e
index 176f8c5bbce..112b9c24751 100644
e60d6e
--- a/libcpp/include/cpplib.h
e60d6e
+++ b/libcpp/include/cpplib.h
e60d6e
@@ -319,6 +319,17 @@ enum cpp_main_search
e60d6e
   CMS_system,  /* Search the system INCLUDE path.  */
e60d6e
 };
e60d6e
 
e60d6e
+/* The possible bidirectional control characters checking levels, from least
e60d6e
+   restrictive to most.  */
e60d6e
+enum cpp_bidirectional_level {
e60d6e
+  /* No checking.  */
e60d6e
+  bidirectional_none,
e60d6e
+  /* Only detect unpaired uses of bidirectional control characters.  */
e60d6e
+  bidirectional_unpaired,
e60d6e
+  /* Detect any use of bidirectional control characters.  */
e60d6e
+  bidirectional_any
e60d6e
+};
e60d6e
+
e60d6e
 /* This structure is nested inside struct cpp_reader, and
e60d6e
    carries all the options visible to the command line.  */
e60d6e
 struct cpp_options
e60d6e
@@ -539,6 +550,10 @@ struct cpp_options
e60d6e
   /* True if warn about differences between C++98 and C++11.  */
e60d6e
   bool cpp_warn_cxx11_compat;
e60d6e
 
e60d6e
+  /* Nonzero if bidirectional control characters checking is on.  See enum
e60d6e
+     cpp_bidirectional_level.  */
e60d6e
+  unsigned char cpp_warn_bidirectional;
e60d6e
+
e60d6e
   /* Dependency generation.  */
e60d6e
   struct
e60d6e
   {
e60d6e
@@ -643,7 +658,8 @@ enum cpp_warning_reason {
e60d6e
   CPP_W_C90_C99_COMPAT,
e60d6e
   CPP_W_C11_C2X_COMPAT,
e60d6e
   CPP_W_CXX11_COMPAT,
e60d6e
-  CPP_W_EXPANSION_TO_DEFINED
e60d6e
+  CPP_W_EXPANSION_TO_DEFINED,
e60d6e
+  CPP_W_BIDIRECTIONAL
e60d6e
 };
e60d6e
 
e60d6e
 /* Callback for header lookup for HEADER, which is the name of a
e60d6e
diff --git a/libcpp/init.c b/libcpp/init.c
e60d6e
index 5a424e23553..f9a8f5f088f 100644
e60d6e
--- a/libcpp/init.c
e60d6e
+++ b/libcpp/init.c
e60d6e
@@ -223,6 +223,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
e60d6e
       = ENABLE_CANONICAL_SYSTEM_HEADERS;
e60d6e
   CPP_OPTION (pfile, ext_numeric_literals) = 1;
e60d6e
   CPP_OPTION (pfile, warn_date_time) = 0;
e60d6e
+  CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired;
e60d6e
 
e60d6e
   /* Default CPP arithmetic to something sensible for the host for the
e60d6e
      benefit of dumb users like fix-header.  */
e60d6e
diff --git a/libcpp/internal.h b/libcpp/internal.h
e60d6e
index 8577cab6c83..0ce0246c5a2 100644
e60d6e
--- a/libcpp/internal.h
e60d6e
+++ b/libcpp/internal.h
e60d6e
@@ -597,6 +597,13 @@ struct cpp_reader
e60d6e
   /* Location identifying the main source file -- intended to be line
e60d6e
      zero of said file.  */
e60d6e
   location_t main_loc;
e60d6e
+
e60d6e
+  /* Returns true iff we should warn about UTF-8 bidirectional control
e60d6e
+     characters.  */
e60d6e
+  bool warn_bidi_p () const
e60d6e
+  {
e60d6e
+    return CPP_OPTION (this, cpp_warn_bidirectional) != bidirectional_none;
e60d6e
+  }
e60d6e
 };
e60d6e
 
e60d6e
 /* Character classes.  Based on the more primitive macros in safe-ctype.h.
e60d6e
diff --git a/libcpp/lex.c b/libcpp/lex.c
e60d6e
index fa2253d41c3..6a4fbce6030 100644
e60d6e
--- a/libcpp/lex.c
e60d6e
+++ b/libcpp/lex.c
e60d6e
@@ -1164,6 +1164,324 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
e60d6e
     }
e60d6e
 }
e60d6e
 
e60d6e
+namespace bidi {
e60d6e
+  enum class kind {
e60d6e
+    NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL
e60d6e
+  };
e60d6e
+
e60d6e
+  /* All the UTF-8 encodings of bidi characters start with E2.  */
e60d6e
+  constexpr uchar utf8_start = 0xe2;
e60d6e
+
e60d6e
+  /* A vector holding currently open bidi contexts.  We use a char for
e60d6e
+     each context, its LSB is 1 if it represents a PDF context, 0 if it
e60d6e
+     represents a PDI context.  The next bit is 1 if this context was open
e60d6e
+     by a bidi character written as a UCN, and 0 when it was UTF-8.  */
e60d6e
+  semi_embedded_vec <unsigned char, 16> vec;
e60d6e
+
e60d6e
+  /* Close the whole comment/identifier/string literal/character constant
e60d6e
+     context.  */
e60d6e
+  void on_close ()
e60d6e
+  {
e60d6e
+    vec.truncate (0);
e60d6e
+  }
e60d6e
+
e60d6e
+  /* Pop the last element in the vector.  */
e60d6e
+  void pop ()
e60d6e
+  {
e60d6e
+    unsigned int len = vec.count ();
e60d6e
+    gcc_checking_assert (len > 0);
e60d6e
+    vec.truncate (len - 1);
e60d6e
+  }
e60d6e
+
e60d6e
+  /* Return the context of the Ith element.  */
e60d6e
+  kind ctx_at (unsigned int i)
e60d6e
+  {
e60d6e
+    return (vec[i] & 1) ? kind::PDF : kind::PDI;
e60d6e
+  }
e60d6e
+
e60d6e
+  /* Return which context is currently opened.  */
e60d6e
+  kind current_ctx ()
e60d6e
+  {
e60d6e
+    unsigned int len = vec.count ();
e60d6e
+    if (len == 0)
e60d6e
+      return kind::NONE;
e60d6e
+    return ctx_at (len - 1);
e60d6e
+  }
e60d6e
+
e60d6e
+  /* Return true if the current context comes from a UCN origin, that is,
e60d6e
+     the bidi char which started this bidi context was written as a UCN.  */
e60d6e
+  bool current_ctx_ucn_p ()
e60d6e
+  {
e60d6e
+    unsigned int len = vec.count ();
e60d6e
+    gcc_checking_assert (len > 0);
e60d6e
+    return (vec[len - 1] >> 1) & 1;
e60d6e
+  }
e60d6e
+
e60d6e
+  /* We've read a bidi char, update the current vector as necessary.  */
e60d6e
+  void on_char (kind k, bool ucn_p)
e60d6e
+  {
e60d6e
+    switch (k)
e60d6e
+      {
e60d6e
+      case kind::LRE:
e60d6e
+      case kind::RLE:
e60d6e
+      case kind::LRO:
e60d6e
+      case kind::RLO:
e60d6e
+	vec.push (ucn_p ? 3u : 1u);
e60d6e
+	break;
e60d6e
+      case kind::LRI:
e60d6e
+      case kind::RLI:
e60d6e
+      case kind::FSI:
e60d6e
+	vec.push (ucn_p ? 2u : 0u);
e60d6e
+	break;
e60d6e
+      /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
e60d6e
+	 whose scope has not yet been terminated.  */
e60d6e
+      case kind::PDF:
e60d6e
+	if (current_ctx () == kind::PDF)
e60d6e
+	  pop ();
e60d6e
+	break;
e60d6e
+      /* PDI terminates the scope of the last LRI, RLI, or FSI whose
e60d6e
+	 scope has not yet been terminated, as well as the scopes of
e60d6e
+	 any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not
e60d6e
+	 yet been terminated.  */
e60d6e
+      case kind::PDI:
e60d6e
+	for (int i = vec.count () - 1; i >= 0; --i)
e60d6e
+	  if (ctx_at (i) == kind::PDI)
e60d6e
+	    {
e60d6e
+	      vec.truncate (i);
e60d6e
+	      break;
e60d6e
+	    }
e60d6e
+	break;
e60d6e
+      case kind::LTR:
e60d6e
+      case kind::RTL:
e60d6e
+	/* These aren't popped by a PDF/PDI.  */
e60d6e
+	break;
e60d6e
+      [[likely]] case kind::NONE:
e60d6e
+	break;
e60d6e
+      default:
e60d6e
+	abort ();
e60d6e
+      }
e60d6e
+  }
e60d6e
+
e60d6e
+  /* Return a descriptive string for K.  */
e60d6e
+  const char *to_str (kind k)
e60d6e
+  {
e60d6e
+    switch (k)
e60d6e
+      {
e60d6e
+      case kind::LRE:
e60d6e
+	return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
e60d6e
+      case kind::RLE:
e60d6e
+	return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
e60d6e
+      case kind::LRO:
e60d6e
+	return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
e60d6e
+      case kind::RLO:
e60d6e
+	return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
e60d6e
+      case kind::LRI:
e60d6e
+	return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
e60d6e
+      case kind::RLI:
e60d6e
+	return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
e60d6e
+      case kind::FSI:
e60d6e
+	return "U+2068 (FIRST STRONG ISOLATE)";
e60d6e
+      case kind::PDF:
e60d6e
+	return "U+202C (POP DIRECTIONAL FORMATTING)";
e60d6e
+      case kind::PDI:
e60d6e
+	return "U+2069 (POP DIRECTIONAL ISOLATE)";
e60d6e
+      case kind::LTR:
e60d6e
+	return "U+200E (LEFT-TO-RIGHT MARK)";
e60d6e
+      case kind::RTL:
e60d6e
+	return "U+200F (RIGHT-TO-LEFT MARK)";
e60d6e
+      default:
e60d6e
+	abort ();
e60d6e
+      }
e60d6e
+  }
e60d6e
+}
e60d6e
+
e60d6e
+/* Parse a sequence of 3 bytes starting with P and return its bidi code.  */
e60d6e
+
e60d6e
+static bidi::kind
e60d6e
+get_bidi_utf8 (const unsigned char *const p)
e60d6e
+{
e60d6e
+  gcc_checking_assert (p[0] == bidi::utf8_start);
e60d6e
+
e60d6e
+  if (p[1] == 0x80)
e60d6e
+    switch (p[2])
e60d6e
+      {
e60d6e
+      case 0xaa:
e60d6e
+	return bidi::kind::LRE;
e60d6e
+      case 0xab:
e60d6e
+	return bidi::kind::RLE;
e60d6e
+      case 0xac:
e60d6e
+	return bidi::kind::PDF;
e60d6e
+      case 0xad:
e60d6e
+	return bidi::kind::LRO;
e60d6e
+      case 0xae:
e60d6e
+	return bidi::kind::RLO;
e60d6e
+      case 0x8e:
e60d6e
+	return bidi::kind::LTR;
e60d6e
+      case 0x8f:
e60d6e
+	return bidi::kind::RTL;
e60d6e
+      default:
e60d6e
+	break;
e60d6e
+      }
e60d6e
+  else if (p[1] == 0x81)
e60d6e
+    switch (p[2])
e60d6e
+      {
e60d6e
+      case 0xa6:
e60d6e
+	return bidi::kind::LRI;
e60d6e
+      case 0xa7:
e60d6e
+	return bidi::kind::RLI;
e60d6e
+      case 0xa8:
e60d6e
+	return bidi::kind::FSI;
e60d6e
+      case 0xa9:
e60d6e
+	return bidi::kind::PDI;
e60d6e
+      default:
e60d6e
+	break;
e60d6e
+      }
e60d6e
+
e60d6e
+  return bidi::kind::NONE;
e60d6e
+}
e60d6e
+
e60d6e
+/* Parse a UCN where P points just past \u or \U and return its bidi code.  */
e60d6e
+
e60d6e
+static bidi::kind
e60d6e
+get_bidi_ucn (const unsigned char *p, bool is_U)
e60d6e
+{
e60d6e
+  /* 6.4.3 Universal Character Names
e60d6e
+      \u hex-quad
e60d6e
+      \U hex-quad hex-quad
e60d6e
+     where \unnnn means \U0000nnnn.  */
e60d6e
+
e60d6e
+  if (is_U)
e60d6e
+    {
e60d6e
+      if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
e60d6e
+	return bidi::kind::NONE;
e60d6e
+      /* Skip 4B so we can treat \u and \U the same below.  */
e60d6e
+      p += 4;
e60d6e
+    }
e60d6e
+
e60d6e
+  /* All code points we are looking for start with 20xx.  */
e60d6e
+  if (p[0] != '2' || p[1] != '0')
e60d6e
+    return bidi::kind::NONE;
e60d6e
+  else if (p[2] == '2')
e60d6e
+    switch (p[3])
e60d6e
+      {
e60d6e
+      case 'a':
e60d6e
+      case 'A':
e60d6e
+	return bidi::kind::LRE;
e60d6e
+      case 'b':
e60d6e
+      case 'B':
e60d6e
+	return bidi::kind::RLE;
e60d6e
+      case 'c':
e60d6e
+      case 'C':
e60d6e
+	return bidi::kind::PDF;
e60d6e
+      case 'd':
e60d6e
+      case 'D':
e60d6e
+	return bidi::kind::LRO;
e60d6e
+      case 'e':
e60d6e
+      case 'E':
e60d6e
+	return bidi::kind::RLO;
e60d6e
+      default:
e60d6e
+	break;
e60d6e
+      }
e60d6e
+  else if (p[2] == '6')
e60d6e
+    switch (p[3])
e60d6e
+      {
e60d6e
+      case '6':
e60d6e
+	return bidi::kind::LRI;
e60d6e
+      case '7':
e60d6e
+	return bidi::kind::RLI;
e60d6e
+      case '8':
e60d6e
+	return bidi::kind::FSI;
e60d6e
+      case '9':
e60d6e
+	return bidi::kind::PDI;
e60d6e
+      default:
e60d6e
+	break;
e60d6e
+      }
e60d6e
+  else if (p[2] == '0')
e60d6e
+    switch (p[3])
e60d6e
+      {
e60d6e
+      case 'e':
e60d6e
+      case 'E':
e60d6e
+	return bidi::kind::LTR;
e60d6e
+      case 'f':
e60d6e
+      case 'F':
e60d6e
+	return bidi::kind::RTL;
e60d6e
+      default:
e60d6e
+	break;
e60d6e
+      }
e60d6e
+
e60d6e
+  return bidi::kind::NONE;
e60d6e
+}
e60d6e
+
e60d6e
+/* We're closing a bidi context, that is, we've encountered a newline,
e60d6e
+   are closing a C-style comment, or are at the end of a string literal,
e60d6e
+   character constant, or identifier.  Warn if this context was not
e60d6e
+   properly terminated by a PDI or PDF.  P points to the last character
e60d6e
+   in this context.  */
e60d6e
+
e60d6e
+static void
e60d6e
+maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
e60d6e
+{
e60d6e
+  if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired
e60d6e
+      && bidi::vec.count () > 0)
e60d6e
+    {
e60d6e
+      const location_t loc
e60d6e
+	= linemap_position_for_column (pfile->line_table,
e60d6e
+				       CPP_BUF_COLUMN (pfile->buffer, p));
e60d6e
+      cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
e60d6e
+			     "unpaired UTF-8 bidirectional control character "
e60d6e
+			     "detected");
e60d6e
+    }
e60d6e
+  /* We're done with this context.  */
e60d6e
+  bidi::on_close ();
e60d6e
+}
e60d6e
+
e60d6e
+/* We're at the beginning or in the middle of an identifier/comment/string
e60d6e
+   literal/character constant.  Warn if we've encountered a bidi character.
e60d6e
+   KIND says which bidi character it was; P points to it in the character
e60d6e
+   stream.  UCN_P is true iff this bidi character was written as a UCN.  */
e60d6e
+
e60d6e
+static void
e60d6e
+maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
e60d6e
+			 bool ucn_p)
e60d6e
+{
e60d6e
+  if (__builtin_expect (kind == bidi::kind::NONE, 1))
e60d6e
+    return;
e60d6e
+
e60d6e
+  const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
e60d6e
+
e60d6e
+  if (warn_bidi != bidirectional_none)
e60d6e
+    {
e60d6e
+      const location_t loc
e60d6e
+	= linemap_position_for_column (pfile->line_table,
e60d6e
+				       CPP_BUF_COLUMN (pfile->buffer, p));
e60d6e
+      /* It seems excessive to warn about a PDI/PDF that is closing
e60d6e
+	 an opened context because we've already warned about the
e60d6e
+	 opening character.  Except warn when we have a UCN x UTF-8
e60d6e
+	 mismatch.  */
e60d6e
+      if (kind == bidi::current_ctx ())
e60d6e
+	{
e60d6e
+	  if (warn_bidi == bidirectional_unpaired
e60d6e
+	      && bidi::current_ctx_ucn_p () != ucn_p)
e60d6e
+	    cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
e60d6e
+				   "UTF-8 vs UCN mismatch when closing "
e60d6e
+				   "a context by \"%s\"", bidi::to_str (kind));
e60d6e
+	}
e60d6e
+      else if (warn_bidi == bidirectional_any)
e60d6e
+	{
e60d6e
+	  if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
e60d6e
+	    cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
e60d6e
+				   "\"%s\" is closing an unopened context",
e60d6e
+				   bidi::to_str (kind));
e60d6e
+	  else
e60d6e
+	    cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
e60d6e
+				   "found problematic Unicode character \"%s\"",
e60d6e
+				   bidi::to_str (kind));
e60d6e
+	}
e60d6e
+    }
e60d6e
+  /* We're done with this context.  */
e60d6e
+  bidi::on_char (kind, ucn_p);
e60d6e
+}
e60d6e
+
e60d6e
 /* Skip a C-style block comment.  We find the end of the comment by
e60d6e
    seeing if an asterisk is before every '/' we encounter.  Returns
e60d6e
    nonzero if comment terminated by EOF, zero otherwise.
e60d6e
@@ -1175,6 +1493,7 @@ _cpp_skip_block_comment (cpp_reader *pfile)
e60d6e
   cpp_buffer *buffer = pfile->buffer;
e60d6e
   const uchar *cur = buffer->cur;
e60d6e
   uchar c;
e60d6e
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
e60d6e
 
e60d6e
   cur++;
e60d6e
   if (*cur == '/')
e60d6e
@@ -1189,7 +1508,11 @@ _cpp_skip_block_comment (cpp_reader *pfile)
e60d6e
       if (c == '/')
e60d6e
 	{
e60d6e
 	  if (cur[-2] == '*')
e60d6e
-	    break;
e60d6e
+	    {
e60d6e
+	      if (warn_bidi_p)
e60d6e
+		maybe_warn_bidi_on_close (pfile, cur);
e60d6e
+	      break;
e60d6e
+	    }
e60d6e
 
e60d6e
 	  /* Warn about potential nested comments, but not if the '/'
e60d6e
 	     comes immediately before the true comment delimiter.
e60d6e
@@ -1208,6 +1531,8 @@ _cpp_skip_block_comment (cpp_reader *pfile)
e60d6e
 	{
e60d6e
 	  unsigned int cols;
e60d6e
 	  buffer->cur = cur - 1;
e60d6e
+	  if (warn_bidi_p)
e60d6e
+	    maybe_warn_bidi_on_close (pfile, cur);
e60d6e
 	  _cpp_process_line_notes (pfile, true);
e60d6e
 	  if (buffer->next_line >= buffer->rlimit)
e60d6e
 	    return true;
e60d6e
@@ -1218,6 +1543,13 @@ _cpp_skip_block_comment (cpp_reader *pfile)
e60d6e
 
e60d6e
 	  cur = buffer->cur;
e60d6e
 	}
e60d6e
+      /* If this is a beginning of a UTF-8 encoding, it might be
e60d6e
+	 a bidirectional control character.  */
e60d6e
+      else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
e60d6e
+	{
e60d6e
+	  bidi::kind kind = get_bidi_utf8 (cur - 1);
e60d6e
+	  maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false);
e60d6e
+	}
e60d6e
     }
e60d6e
 
e60d6e
   buffer->cur = cur;
e60d6e
@@ -1233,9 +1565,31 @@ skip_line_comment (cpp_reader *pfile)
e60d6e
 {
e60d6e
   cpp_buffer *buffer = pfile->buffer;
e60d6e
   location_t orig_line = pfile->line_table->highest_line;
e60d6e
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
e60d6e
 
e60d6e
-  while (*buffer->cur != '\n')
e60d6e
-    buffer->cur++;
e60d6e
+  if (!warn_bidi_p)
e60d6e
+    while (*buffer->cur != '\n')
e60d6e
+      buffer->cur++;
e60d6e
+  else
e60d6e
+    {
e60d6e
+      while (*buffer->cur != '\n'
e60d6e
+	     && *buffer->cur != bidi::utf8_start)
e60d6e
+	buffer->cur++;
e60d6e
+      if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
e60d6e
+	{
e60d6e
+	  while (*buffer->cur != '\n')
e60d6e
+	    {
e60d6e
+	      if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
e60d6e
+		{
e60d6e
+		  bidi::kind kind = get_bidi_utf8 (buffer->cur);
e60d6e
+		  maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
e60d6e
+					   /*ucn_p=*/false);
e60d6e
+		}
e60d6e
+	      buffer->cur++;
e60d6e
+	    }
e60d6e
+	  maybe_warn_bidi_on_close (pfile, buffer->cur);
e60d6e
+	}
e60d6e
+    }
e60d6e
 
e60d6e
   _cpp_process_line_notes (pfile, true);
e60d6e
   return orig_line != pfile->line_table->highest_line;
e60d6e
@@ -1346,11 +1700,13 @@ static const cppchar_t utf8_signifier = 0xC0;
e60d6e
 
e60d6e
 /* Returns TRUE if the sequence starting at buffer->cur is valid in
e60d6e
    an identifier.  FIRST is TRUE if this starts an identifier.  */
e60d6e
+
e60d6e
 static bool
e60d6e
 forms_identifier_p (cpp_reader *pfile, int first,
e60d6e
 		    struct normalize_state *state)
e60d6e
 {
e60d6e
   cpp_buffer *buffer = pfile->buffer;
e60d6e
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
e60d6e
 
e60d6e
   if (*buffer->cur == '$')
e60d6e
     {
e60d6e
@@ -1373,6 +1729,13 @@ forms_identifier_p (cpp_reader *pfile, int first,
e60d6e
       cppchar_t s;
e60d6e
       if (*buffer->cur >= utf8_signifier)
e60d6e
 	{
e60d6e
+	  if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)
e60d6e
+	      && warn_bidi_p)
e60d6e
+	    {
e60d6e
+	      bidi::kind kind = get_bidi_utf8 (buffer->cur);
e60d6e
+	      maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
e60d6e
+				       /*ucn_p=*/false);
e60d6e
+	    }
e60d6e
 	  if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
e60d6e
 			       state, &s))
e60d6e
 	    return true;
e60d6e
@@ -1381,6 +1744,13 @@ forms_identifier_p (cpp_reader *pfile, int first,
e60d6e
 	       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
e60d6e
 	{
e60d6e
 	  buffer->cur += 2;
e60d6e
+	  if (warn_bidi_p)
e60d6e
+	    {
e60d6e
+	      bidi::kind kind = get_bidi_ucn (buffer->cur,
e60d6e
+					      buffer->cur[-1] == 'U');
e60d6e
+	      maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
e60d6e
+				       /*ucn_p=*/true);
e60d6e
+	    }
e60d6e
 	  if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
e60d6e
 			      state, &s, NULL, NULL))
e60d6e
 	    return true;
e60d6e
@@ -1489,6 +1859,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
e60d6e
   const uchar *cur;
e60d6e
   unsigned int len;
e60d6e
   unsigned int hash = HT_HASHSTEP (0, *base);
e60d6e
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
e60d6e
 
e60d6e
   cur = pfile->buffer->cur;
e60d6e
   if (! starts_ucn)
e60d6e
@@ -1512,6 +1883,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
e60d6e
 	    pfile->buffer->cur++;
e60d6e
 	  }
e60d6e
       } while (forms_identifier_p (pfile, false, nst));
e60d6e
+      if (warn_bidi_p)
e60d6e
+	maybe_warn_bidi_on_close (pfile, pfile->buffer->cur);
e60d6e
       result = _cpp_interpret_identifier (pfile, base,
e60d6e
 					  pfile->buffer->cur - base);
e60d6e
       *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
e60d6e
@@ -1758,6 +2131,7 @@ static void
e60d6e
 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
e60d6e
 {
e60d6e
   const uchar *pos = base;
e60d6e
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
e60d6e
 
e60d6e
   /* 'tis a pity this information isn't passed down from the lexer's
e60d6e
      initial categorization of the token.  */
e60d6e
@@ -1994,8 +2368,15 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
e60d6e
 	  pos = base = pfile->buffer->cur;
e60d6e
 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
e60d6e
 	}
e60d6e
+      else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
e60d6e
+	       && warn_bidi_p)
e60d6e
+	maybe_warn_bidi_on_char (pfile, pos - 1, get_bidi_utf8 (pos - 1),
e60d6e
+				 /*ucn_p=*/false);
e60d6e
     }
e60d6e
 
e60d6e
+  if (warn_bidi_p)
e60d6e
+    maybe_warn_bidi_on_close (pfile, pos);
e60d6e
+
e60d6e
   if (CPP_OPTION (pfile, user_literals))
e60d6e
     {
e60d6e
       /* If a string format macro, say from inttypes.h, is placed touching
e60d6e
@@ -2090,15 +2471,27 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
e60d6e
   else
e60d6e
     terminator = '>', type = CPP_HEADER_NAME;
e60d6e
 
e60d6e
+  const bool warn_bidi_p = pfile->warn_bidi_p ();
e60d6e
   for (;;)
e60d6e
     {
e60d6e
       cppchar_t c = *cur++;
e60d6e
 
e60d6e
       /* In #include-style directives, terminators are not escapable.  */
e60d6e
       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
e60d6e
-	cur++;
e60d6e
+	{
e60d6e
+	  if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
e60d6e
+	    {
e60d6e
+	      bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U');
e60d6e
+	      maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true);
e60d6e
+	    }
e60d6e
+	  cur++;
e60d6e
+	}
e60d6e
       else if (c == terminator)
e60d6e
-	break;
e60d6e
+	{
e60d6e
+	  if (warn_bidi_p)
e60d6e
+	    maybe_warn_bidi_on_close (pfile, cur - 1);
e60d6e
+	  break;
e60d6e
+	}
e60d6e
       else if (c == '\n')
e60d6e
 	{
e60d6e
 	  cur--;
e60d6e
@@ -2115,6 +2508,11 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
e60d6e
 	}
e60d6e
       else if (c == '\0')
e60d6e
 	saw_NUL = true;
e60d6e
+      else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
e60d6e
+	{
e60d6e
+	  bidi::kind kind = get_bidi_utf8 (cur - 1);
e60d6e
+	  maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false);
e60d6e
+	}
e60d6e
     }
e60d6e
 
e60d6e
   if (saw_NUL && !pfile->state.skipping)