// Character information.
//===----------------------------------------------------------------------===//
-static unsigned char CharInfo[256];
-
enum {
CHAR_HORZ_WS = 0x01, // ' ', '\t', '\f', '\v'. Note, no '\0'
CHAR_VERT_WS = 0x02, // '\r', '\n'
CHAR_PERIOD = 0x20 // .
};
+// Statically initialize CharInfo table based on ASCII character set
+// Reference: FreeBSD 7.2 /usr/share/misc/ascii
+static const unsigned char CharInfo[256] =
+{
+// 0 NUL 1 SOH 2 STX 3 ETX
+// 4 EOT 5 ENQ 6 ACK 7 BEL
+ 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+// 8 BS 9 HT 10 NL 11 VT
+//12 NP 13 CR 14 SO 15 SI
+ 0 , CHAR_HORZ_WS, CHAR_VERT_WS, CHAR_HORZ_WS,
+ CHAR_HORZ_WS, CHAR_VERT_WS, 0 , 0 ,
+//16 DLE 17 DC1 18 DC2 19 DC3
+//20 DC4 21 NAK 22 SYN 23 ETB
+ 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+//24 CAN 25 EM 26 SUB 27 ESC
+//28 FS 29 GS 30 RS 31 US
+ 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+//32 SP 33 ! 34 " 35 #
+//36 $ 37 % 38 & 39 '
+ CHAR_HORZ_WS, 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+//40 ( 41 ) 42 * 43 +
+//44 , 45 - 46 . 47 /
+ 0 , 0 , 0 , 0 ,
+ 0 , 0 , CHAR_PERIOD , 0 ,
+//48 0 49 1 50 2 51 3
+//52 4 53 5 54 6 55 7
+ CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
+ CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
+//56 8 57 9 58 : 59 ;
+//60 < 61 = 62 > 63 ?
+ CHAR_NUMBER , CHAR_NUMBER , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+//64 @ 65 A 66 B 67 C
+//68 D 69 E 70 F 71 G
+ 0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//72 H 73 I 74 J 75 K
+//76 L 77 M 78 N 79 O
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//80 P 81 Q 82 R 83 S
+//84 T 85 U 86 V 87 W
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//88 X 89 Y 90 Z 91 [
+//92 \ 93 ] 94 ^ 95 _
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 ,
+ 0 , 0 , 0 , CHAR_UNDER ,
+//96 ` 97 a 98 b 99 c
+//100 d 101 e 102 f 103 g
+ 0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//104 h 105 i 106 j 107 k
+//108 l 109 m 110 n 111 o
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//112 p 113 q 114 r 115 s
+//116 t 117 u 118 v 119 w
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//120 x 121 y 122 z 123 {
+//124 | 125 } 126 ~ 127 DEL
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 ,
+ 0 , 0 , 0 , 0
+};
+
static void InitCharacterInfo() {
static bool isInited = false;
if (isInited) return;
- isInited = true;
-
- // Intiialize the CharInfo table.
- // TODO: statically initialize this.
- CharInfo[(int)' '] = CharInfo[(int)'\t'] =
- CharInfo[(int)'\f'] = CharInfo[(int)'\v'] = CHAR_HORZ_WS;
- CharInfo[(int)'\n'] = CharInfo[(int)'\r'] = CHAR_VERT_WS;
-
- CharInfo[(int)'_'] = CHAR_UNDER;
- CharInfo[(int)'.'] = CHAR_PERIOD;
- for (unsigned i = 'a'; i <= 'z'; ++i)
- CharInfo[i] = CharInfo[i+'A'-'a'] = CHAR_LETTER;
+ // check the statically-initialized CharInfo table
+ assert(CHAR_HORZ_WS == CharInfo[(int)' ']);
+ assert(CHAR_HORZ_WS == CharInfo[(int)'\t']);
+ assert(CHAR_HORZ_WS == CharInfo[(int)'\f']);
+ assert(CHAR_HORZ_WS == CharInfo[(int)'\v']);
+ assert(CHAR_VERT_WS == CharInfo[(int)'\n']);
+ assert(CHAR_VERT_WS == CharInfo[(int)'\r']);
+ assert(CHAR_UNDER == CharInfo[(int)'_']);
+ assert(CHAR_PERIOD == CharInfo[(int)'.']);
+ for (unsigned i = 'a'; i <= 'z'; ++i) {
+ assert(CHAR_LETTER == CharInfo[i]);
+ assert(CHAR_LETTER == CharInfo[i+'A'-'a']);
+ }
for (unsigned i = '0'; i <= '9'; ++i)
- CharInfo[i] = CHAR_NUMBER;
+ assert(CHAR_NUMBER == CharInfo[i]);
+ isInited = true;
}
+
/// isIdentifierBody - Return true if this is the body character of an
/// identifier, which is [a-zA-Z0-9_].
static inline bool isIdentifierBody(unsigned char c) {