From: Chris Lattner Date: Tue, 7 Jul 2009 17:09:54 +0000 (+0000) Subject: Convert the CharInfo table to be statically initialized, instead of dynamically initi... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=03b986660cc9ae7a08588f99913f2d91e359a65f;p=clang Convert the CharInfo table to be statically initialized, instead of dynamically initialized. Patch by Ryan Flynn! git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@74919 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 6dea5b7dde..974b6900b7 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -242,8 +242,6 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc, // Character information. //===----------------------------------------------------------------------===// -static unsigned char CharInfo[256]; - enum { CHAR_HORZ_WS = 0x01, // ' ', '\t', '\f', '\v'. Note, no '\0' CHAR_VERT_WS = 0x02, // '\r', '\n' @@ -253,25 +251,98 @@ enum { CHAR_PERIOD = 0x20 // . }; +// Statically initialize CharInfo table based on ASCII character set +// Reference: FreeBSD 7.2 /usr/share/misc/ascii +static const unsigned char CharInfo[256] = +{ +// 0 NUL 1 SOH 2 STX 3 ETX +// 4 EOT 5 ENQ 6 ACK 7 BEL + 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , +// 8 BS 9 HT 10 NL 11 VT +//12 NP 13 CR 14 SO 15 SI + 0 , CHAR_HORZ_WS, CHAR_VERT_WS, CHAR_HORZ_WS, + CHAR_HORZ_WS, CHAR_VERT_WS, 0 , 0 , +//16 DLE 17 DC1 18 DC2 19 DC3 +//20 DC4 21 NAK 22 SYN 23 ETB + 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , +//24 CAN 25 EM 26 SUB 27 ESC +//28 FS 29 GS 30 RS 31 US + 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , +//32 SP 33 ! 34 " 35 # +//36 $ 37 % 38 & 39 ' + CHAR_HORZ_WS, 0 , 0 , 0 , + 0 , 0 , 0 , 0 , +//40 ( 41 ) 42 * 43 + +//44 , 45 - 46 . 47 / + 0 , 0 , 0 , 0 , + 0 , 0 , CHAR_PERIOD , 0 , +//48 0 49 1 50 2 51 3 +//52 4 53 5 54 6 55 7 + CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , + CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , +//56 8 57 9 58 : 59 ; +//60 < 61 = 62 > 63 ? + CHAR_NUMBER , CHAR_NUMBER , 0 , 0 , + 0 , 0 , 0 , 0 , +//64 @ 65 A 66 B 67 C +//68 D 69 E 70 F 71 G + 0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , +//72 H 73 I 74 J 75 K +//76 L 77 M 78 N 79 O + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , +//80 P 81 Q 82 R 83 S +//84 T 85 U 86 V 87 W + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , +//88 X 89 Y 90 Z 91 [ +//92 \ 93 ] 94 ^ 95 _ + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 , + 0 , 0 , 0 , CHAR_UNDER , +//96 ` 97 a 98 b 99 c +//100 d 101 e 102 f 103 g + 0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , +//104 h 105 i 106 j 107 k +//108 l 109 m 110 n 111 o + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , +//112 p 113 q 114 r 115 s +//116 t 117 u 118 v 119 w + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , +//120 x 121 y 122 z 123 { +//124 | 125 } 126 ~ 127 DEL + CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 , + 0 , 0 , 0 , 0 +}; + static void InitCharacterInfo() { static bool isInited = false; if (isInited) return; - isInited = true; - - // Intiialize the CharInfo table. - // TODO: statically initialize this. - CharInfo[(int)' '] = CharInfo[(int)'\t'] = - CharInfo[(int)'\f'] = CharInfo[(int)'\v'] = CHAR_HORZ_WS; - CharInfo[(int)'\n'] = CharInfo[(int)'\r'] = CHAR_VERT_WS; - - CharInfo[(int)'_'] = CHAR_UNDER; - CharInfo[(int)'.'] = CHAR_PERIOD; - for (unsigned i = 'a'; i <= 'z'; ++i) - CharInfo[i] = CharInfo[i+'A'-'a'] = CHAR_LETTER; + // check the statically-initialized CharInfo table + assert(CHAR_HORZ_WS == CharInfo[(int)' ']); + assert(CHAR_HORZ_WS == CharInfo[(int)'\t']); + assert(CHAR_HORZ_WS == CharInfo[(int)'\f']); + assert(CHAR_HORZ_WS == CharInfo[(int)'\v']); + assert(CHAR_VERT_WS == CharInfo[(int)'\n']); + assert(CHAR_VERT_WS == CharInfo[(int)'\r']); + assert(CHAR_UNDER == CharInfo[(int)'_']); + assert(CHAR_PERIOD == CharInfo[(int)'.']); + for (unsigned i = 'a'; i <= 'z'; ++i) { + assert(CHAR_LETTER == CharInfo[i]); + assert(CHAR_LETTER == CharInfo[i+'A'-'a']); + } for (unsigned i = '0'; i <= '9'; ++i) - CharInfo[i] = CHAR_NUMBER; + assert(CHAR_NUMBER == CharInfo[i]); + isInited = true; } + /// isIdentifierBody - Return true if this is the body character of an /// identifier, which is [a-zA-Z0-9_]. static inline bool isIdentifierBody(unsigned char c) {