From 6cc38f2df36ff977b723620bac20a6435e132e32 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 8 Oct 2019 04:39:52 +0000 Subject: [PATCH] [IA] Recognize hexadecimal escape sequences Summary: Implement support for hexadecimal escape sequences to match how GNU 'as' handles them. I.e., read all hexadecimal characters and truncate to the lower 16 bits. Reviewers: nickdesaulniers, jcai19 Subscribers: llvm-commits, hiraditya Tags: #llvm Differential Revision: https://reviews.llvm.org/D68598 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374018 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 18 +++++++++++++++++- test/MC/AsmParser/directive_ascii.s | 5 +++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 381bf964161..ca6bc252a0d 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -2914,11 +2914,27 @@ bool AsmParser::parseEscapedString(std::string &Data) { } // Recognize escaped characters. Note that this escape semantics currently - // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes. + // loosely follows Darwin 'as'. ++i; if (i == e) return TokError("unexpected backslash at end of string"); + // Recognize hex sequences similarly to GNU 'as'. + if (Str[i] == 'x' || Str[i] == 'X') { + size_t length = Str.size(); + if (i + 1 >= length || !isHexDigit(Str[i + 1])) + return TokError("invalid hexadecimal escape sequence"); + + // Consume hex characters. GNU 'as' reads all hexadecimal characters and + // then truncates to the lower 16 bits. Seems reasonable. + unsigned Value = 0; + while (i + 1 < length && isHexDigit(Str[i + 1])) + Value = Value * 16 + hexDigitValue(Str[++i]); + + Data += (unsigned char)(Value & 0xFF); + continue; + } + // Recognize octal sequences. if ((unsigned)(Str[i] - '0') <= 7) { // Consume up to three octal characters. diff --git a/test/MC/AsmParser/directive_ascii.s b/test/MC/AsmParser/directive_ascii.s index a7ba7bbd5da..604f9721bcc 100644 --- a/test/MC/AsmParser/directive_ascii.s +++ b/test/MC/AsmParser/directive_ascii.s @@ -39,3 +39,8 @@ TEST5: # CHECK: .byte 0 TEST6: .string "B", "C" + +# CHECK: TEST7: +# CHECK: .ascii "dk" +TEST7: + .ascii "\x64\Xa6B" -- 2.40.0