From ff8b0ae836dbfa06a5fb0f25da70d47b8ff5a4cb Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Fri, 2 Jan 2009 07:27:39 +0000 Subject: [PATCH] Gas parser: Move instruction/prefix lookup from tokenizer to parser. Use the single token of lookahead to detect the label case. This is significantly cleaner as it removes the special-casing of labels in the tokenizer (so there is just a single identifier rule) and removes the INSTDIR parser state (as this was only used to prevent instruction lookup within other locations). Also, ID and LABEL now provide the string length to the parser. We needed to do this for ID due to parse_check_insnprefix() needing the length, so both were folded in for consistency. svn path=/trunk/yasm/; revision=2164 --- modules/parsers/gas/gas-parse.c | 130 ++++++++++++++++++------------- modules/parsers/gas/gas-parser.h | 12 +-- modules/parsers/gas/gas-token.re | 74 +++++------------- 3 files changed, 98 insertions(+), 118 deletions(-) diff --git a/modules/parsers/gas/gas-parse.c b/modules/parsers/gas/gas-parse.c index e3bd6f7a..41b1888d 100644 --- a/modules/parsers/gas/gas-parse.c +++ b/modules/parsers/gas/gas-parse.c @@ -32,6 +32,7 @@ RCSID("$Id$"); #include +#include #include #include @@ -108,14 +109,9 @@ destroy_curtok_(yasm_parser_gas *parser_gas) break; case ID: case LABEL: - yasm_xfree(curval.str_val); - break; case STRING: yasm_xfree(curval.str.contents); break; - case INSN: - yasm_bc_destroy(curval.bc); - break; default: break; } @@ -156,8 +152,6 @@ expect_(yasm_parser_gas *parser_gas, int token) case INTNUM: str = "integer"; break; case FLTNUM: str = "floating point value"; break; case STRING: str = "string"; break; - case INSN: str = "instruction"; break; - case PREFIX: str = "instruction prefix"; break; case REG: str = "register"; break; case REGGROUP: str = "register group"; break; case SEGREG: str = "segment register"; break; @@ -205,7 +199,6 @@ parse_line(yasm_parser_gas *parser_gas) return dir->handler(parser_gas, dir->param); } - parser_gas->state = INSTDIR; get_next_token(); /* ID */ if (curtok == ':') { /* Label */ @@ -939,15 +932,38 @@ static yasm_bytecode * parse_instr(yasm_parser_gas *parser_gas) { yasm_bytecode *bc; + char *id; + size_t id_len; + uintptr_t prefix; - switch (curtok) { - case INSN: + if (curtok != ID) + return NULL; + + id = ID_val; + id_len = ID_len; + + /* instructions/prefixes must start with a letter */ + if (!isalpha(id[0])) + return NULL; + + /* check to be sure it's not a label */ + get_peek_token(parser_gas); + if (parser_gas->peek_token == ':') + return NULL; + + switch (yasm_arch_parse_check_insnprefix + (p_object->arch, ID_val, ID_len, cur_line, &bc, &prefix)) { + case YASM_ARCH_INSN: { yasm_insn *insn; - bc = INSN_val; + + /* Propagate errors in case we got a warning from the arch */ + yasm_errwarn_propagate(parser_gas->errwarns, cur_line); + insn = yasm_bc_get_insn(bc); - get_next_token(); + yasm_xfree(id); + get_next_token(); /* ID */ if (is_eol()) return bc; /* no operands */ @@ -972,16 +988,21 @@ parse_instr(yasm_parser_gas *parser_gas) } return bc; } - case PREFIX: + case YASM_ARCH_PREFIX: { - uintptr_t prefix = PREFIX_val; - get_next_token(); /* PREFIX */ + /* Propagate errors in case we got a warning from the arch */ + yasm_errwarn_propagate(parser_gas->errwarns, cur_line); + + yasm_xfree(id); + get_next_token(); /* ID */ bc = parse_instr(parser_gas); if (!bc) bc = yasm_arch_create_empty_insn(p_object->arch, cur_line); yasm_insn_add_prefix(yasm_bc_get_insn(bc), prefix); return bc; } +#if 0 + /* TODO */ case SEGREG: { uintptr_t segreg = SEGREG_val; @@ -991,6 +1012,7 @@ parse_instr(yasm_parser_gas *parser_gas) bc = yasm_arch_create_empty_insn(p_object->arch, cur_line); yasm_insn_add_seg_prefix(yasm_bc_get_insn(bc), segreg); } +#endif default: return NULL; } @@ -1652,59 +1674,59 @@ gas_parser_dir_fill(yasm_parser_gas *parser_gas, /*@only@*/ yasm_expr *repeat, static dir_lookup dirs_static[] = { /* FIXME: Whether this is power-of-two or not depends on arch and objfmt. */ - {".align", dir_align, 0, INSTDIR}, - {".p2align", dir_align, 1, INSTDIR}, - {".balign", dir_align, 0, INSTDIR}, - {".org", dir_org, 0, INSTDIR}, + {".align", dir_align, 0, INITIAL}, + {".p2align", dir_align, 1, INITIAL}, + {".balign", dir_align, 0, INITIAL}, + {".org", dir_org, 0, INITIAL}, /* data visibility directives */ - {".local", dir_local, 0, INSTDIR}, - {".comm", dir_comm, 0, INSTDIR}, - {".lcomm", dir_comm, 1, INSTDIR}, + {".local", dir_local, 0, INITIAL}, + {".comm", dir_comm, 0, INITIAL}, + {".lcomm", dir_comm, 1, INITIAL}, /* integer data declaration directives */ - {".byte", dir_data, 1, INSTDIR}, - {".2byte", dir_data, 2, INSTDIR}, - {".4byte", dir_data, 4, INSTDIR}, - {".8byte", dir_data, 8, INSTDIR}, - {".16byte", dir_data, 16, INSTDIR}, + {".byte", dir_data, 1, INITIAL}, + {".2byte", dir_data, 2, INITIAL}, + {".4byte", dir_data, 4, INITIAL}, + {".8byte", dir_data, 8, INITIAL}, + {".16byte", dir_data, 16, INITIAL}, /* TODO: These should depend on arch */ - {".short", dir_data, 2, INSTDIR}, - {".int", dir_data, 4, INSTDIR}, - {".long", dir_data, 4, INSTDIR}, - {".hword", dir_data, 2, INSTDIR}, - {".quad", dir_data, 8, INSTDIR}, - {".octa", dir_data, 16, INSTDIR}, + {".short", dir_data, 2, INITIAL}, + {".int", dir_data, 4, INITIAL}, + {".long", dir_data, 4, INITIAL}, + {".hword", dir_data, 2, INITIAL}, + {".quad", dir_data, 8, INITIAL}, + {".octa", dir_data, 16, INITIAL}, /* XXX: At least on x86, this is 2 bytes */ - {".value", dir_data, 2, INSTDIR}, + {".value", dir_data, 2, INITIAL}, /* ASCII data declaration directives */ - {".ascii", dir_ascii, 0, INSTDIR}, /* no terminating zero */ - {".asciz", dir_ascii, 1, INSTDIR}, /* add terminating zero */ - {".string", dir_ascii, 1, INSTDIR}, /* add terminating zero */ + {".ascii", dir_ascii, 0, INITIAL}, /* no terminating zero */ + {".asciz", dir_ascii, 1, INITIAL}, /* add terminating zero */ + {".string", dir_ascii, 1, INITIAL}, /* add terminating zero */ /* LEB128 integer data declaration directives */ - {".sleb128", dir_leb128, 1, INSTDIR}, /* signed */ - {".uleb128", dir_leb128, 0, INSTDIR}, /* unsigned */ + {".sleb128", dir_leb128, 1, INITIAL}, /* signed */ + {".uleb128", dir_leb128, 0, INITIAL}, /* unsigned */ /* floating point data declaration directives */ - {".float", dir_data, 4, INSTDIR}, - {".single", dir_data, 4, INSTDIR}, - {".double", dir_data, 8, INSTDIR}, - {".tfloat", dir_data, 10, INSTDIR}, + {".float", dir_data, 4, INITIAL}, + {".single", dir_data, 4, INITIAL}, + {".double", dir_data, 8, INITIAL}, + {".tfloat", dir_data, 10, INITIAL}, /* section directives */ {".bss", dir_bss_section, 0, INITIAL}, {".data", dir_data_section, 0, INITIAL}, {".text", dir_text_section, 0, INITIAL}, {".section", dir_section, 0, SECTION_DIRECTIVE}, /* macro directives */ - {".rept", dir_rept, 0, INSTDIR}, - {".endr", dir_endr, 0, INSTDIR}, + {".rept", dir_rept, 0, INITIAL}, + {".endr", dir_endr, 0, INITIAL}, /* empty space/fill directives */ - {".skip", dir_skip, 0, INSTDIR}, - {".space", dir_skip, 0, INSTDIR}, - {".fill", dir_fill, 0, INSTDIR}, - {".zero", dir_zero, 0, INSTDIR}, + {".skip", dir_skip, 0, INITIAL}, + {".space", dir_skip, 0, INITIAL}, + {".fill", dir_fill, 0, INITIAL}, + {".zero", dir_zero, 0, INITIAL}, /* other directives */ - {".equ", dir_equ, 0, INSTDIR}, - {".file", dir_file, 0, INSTDIR}, - {".line", dir_line, 0, INSTDIR}, - {".set", dir_equ, 0, INSTDIR} + {".equ", dir_equ, 0, INITIAL}, + {".file", dir_file, 0, INITIAL}, + {".line", dir_line, 0, INITIAL}, + {".set", dir_equ, 0, INITIAL} }; static void @@ -1722,7 +1744,7 @@ gas_parser_parse(yasm_parser_gas *parser_gas) word.name = ".word"; word.handler = dir_data; word.param = yasm_arch_wordsize(p_object->arch)/8; - word.newstate = INSTDIR; + word.newstate = INITIAL; /* Create directive lookup */ parser_gas->dirs = HAMT_create(1, yasm_internal_error_); diff --git a/modules/parsers/gas/gas-parser.h b/modules/parsers/gas/gas-parser.h index 70704b53..3463563f 100644 --- a/modules/parsers/gas/gas-parser.h +++ b/modules/parsers/gas/gas-parser.h @@ -38,8 +38,6 @@ enum tokentype { INTNUM = 258, FLTNUM, STRING, - INSN, - PREFIX, REG, REGGROUP, SEGREG, @@ -55,7 +53,6 @@ enum tokentype { typedef union { unsigned int int_info; - char *str_val; yasm_intnum *intn; yasm_floatnum *flt; yasm_bytecode *bc; @@ -91,7 +88,6 @@ enum gas_parser_state { INITIAL, COMMENT, SECTION_DIRECTIVE, - INSTDIR, NASM_FILENAME }; @@ -164,14 +160,14 @@ typedef struct yasm_parser_gas { #define INTNUM_val (curval.intn) #define FLTNUM_val (curval.flt) #define STRING_val (curval.str) -#define INSN_val (curval.bc) -#define PREFIX_val (curval.arch_data) #define REG_val (curval.arch_data) #define REGGROUP_val (curval.arch_data) #define SEGREG_val (curval.arch_data) #define TARGETMOD_val (curval.arch_data) -#define ID_val (curval.str_val) -#define LABEL_val (curval.str_val) +#define ID_val (curval.str.contents) +#define ID_len (curval.str.len) +#define LABEL_val (curval.str.contents) +#define LABEL_len (curval.str.len) #define cur_line (yasm_linemap_get_current(parser_gas->linemap)) diff --git a/modules/parsers/gas/gas-token.re b/modules/parsers/gas/gas-token.re index 0ac61f12..0a68af59 100644 --- a/modules/parsers/gas/gas-token.re +++ b/modules/parsers/gas/gas-token.re @@ -393,9 +393,10 @@ scan: RETURN(s->tok[0]); } - /* label or maybe directive */ - [_.][a-zA-Z0-9_$.]* { - lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); + /* identifier */ + [a-zA-Z_.][a-zA-Z0-9_$.]* { + lvalp->str.contents = yasm__xstrndup(TOK, TOKLEN); + lvalp->str.len = TOKLEN; RETURN(ID); } @@ -429,75 +430,35 @@ scan: RETURN(REG); } - /* label */ - [a-zA-Z][a-zA-Z0-9_$.]* ws* ':' { - /* strip off colon and any whitespace */ - count = TOKLEN-1; - while (s->tok[count] == ' ' || s->tok[count] == '\t' - || s->tok[count] == '\r') - count--; - /* Just an identifier, return as such. */ - lvalp->str_val = yasm__xstrndup(TOK, count); - RETURN(LABEL); - } - /* local label */ [0-9] ':' { /* increment label index */ parser_gas->local[s->tok[0]-'0']++; /* build local label name */ - lvalp->str_val = yasm_xmalloc(30); - sprintf(lvalp->str_val, "L%c\001%lu", s->tok[0], - parser_gas->local[s->tok[0]-'0']); + lvalp->str.contents = yasm_xmalloc(30); + lvalp->str.len = + sprintf(lvalp->str.contents, "L%c\001%lu", s->tok[0], + parser_gas->local[s->tok[0]-'0']); RETURN(LABEL); } /* local label forward reference */ [0-9] 'f' { /* build local label name */ - lvalp->str_val = yasm_xmalloc(30); - sprintf(lvalp->str_val, "L%c\001%lu", s->tok[0], - parser_gas->local[s->tok[0]-'0']+1); + lvalp->str.contents = yasm_xmalloc(30); + lvalp->str.len = + sprintf(lvalp->str.contents, "L%c\001%lu", s->tok[0], + parser_gas->local[s->tok[0]-'0']+1); RETURN(ID); } /* local label backward reference */ [0-9] 'b' { /* build local label name */ - lvalp->str_val = yasm_xmalloc(30); - sprintf(lvalp->str_val, "L%c\001%lu", s->tok[0], - parser_gas->local[s->tok[0]-'0']); - RETURN(ID); - } - - /* identifier that may be an instruction, etc. */ - [a-zA-Z][a-zA-Z0-9_$.]* { - /* Can only be an instruction/prefix when not inside an - * instruction or directive. - */ - if (parser_gas->state != INSTDIR) { - uintptr_t prefix; - savech = s->tok[TOKLEN]; - s->tok[TOKLEN] = '\0'; - switch (yasm_arch_parse_check_insnprefix - (p_object->arch, TOK, TOKLEN, cur_line, &lvalp->bc, - &prefix)) { - case YASM_ARCH_INSN: - s->tok[TOKLEN] = savech; - parser_gas->state = INSTDIR; - RETURN(INSN); - case YASM_ARCH_PREFIX: - lvalp->arch_data = prefix; - s->tok[TOKLEN] = savech; - RETURN(PREFIX); - default: - s->tok[TOKLEN] = savech; - } - } - /* Propagate errors in case we got a warning from the arch */ - yasm_errwarn_propagate(parser_gas->errwarns, cur_line); - /* Just an identifier, return as such. */ - lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); + lvalp->str.contents = yasm_xmalloc(30); + lvalp->str.len = + sprintf(lvalp->str.contents, "L%c\001%lu", s->tok[0], + parser_gas->local[s->tok[0]-'0']); RETURN(ID); } @@ -560,7 +521,8 @@ section_directive: /*!re2c [a-zA-Z0-9_$.-]+ { - lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); + lvalp->str.contents = yasm__xstrndup(TOK, TOKLEN); + lvalp->str.len = TOKLEN; parser_gas->state = INITIAL; RETURN(ID); } -- 2.40.0