From: Peter Johnson Date: Fri, 7 Dec 2001 03:32:05 +0000 (-0000) Subject: Start work on optimizer module. Moved all "post-parser-finalization" code X-Git-Tag: v0.1.0~142 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2727869bd84b0b56930a996aca705783b86fcf69;p=yasm Start work on optimizer module. Moved all "post-parser-finalization" code into the optimizer stage, where it belongs. Naming has changed, but most implementations have not been brought up to date to do what they say they do. svn path=/trunk/yasm/; revision=388 --- diff --git a/frontends/yasm/yasm.c b/frontends/yasm/yasm.c index e78e1dfd..e8c24538 100644 --- a/frontends/yasm/yasm.c +++ b/frontends/yasm/yasm.c @@ -36,6 +36,7 @@ #include "objfmt.h" #include "preproc.h" #include "parser.h" +#include "optimizer.h" #include "arch.h" @@ -189,9 +190,9 @@ main(int argc, char *argv[]) indent_level--; symrec_parser_finalize(); - sections_parser_finalize(sections); + basic_optimizer.optimize(sections); - fprintf(obj, "\nSections after post-parser-finalization:\n"); + fprintf(obj, "\nSections after optimization:\n"); indent_level++; sections_print(obj, sections); indent_level--; diff --git a/libyasm/arch.h b/libyasm/arch.h index 9dd71fb8..ece57e1b 100644 --- a/libyasm/arch.h +++ b/libyasm/arch.h @@ -38,7 +38,11 @@ struct arch { void (*bc_delete) (bytecode *bc); void (*bc_print) (FILE *f, const bytecode *bc); - void (*bc_parser_finalize) (bytecode *bc); + + /* See bytecode.h comments on bc_calc_len() */ + int (*bc_calc_len) (bytecode *bc, /*@only@*/ /*@null@*/ + intnum *(*resolve_label) (section *sect, /*@null@*/ + bytecode *bc)); } bc; }; diff --git a/libyasm/bytecode.c b/libyasm/bytecode.c index d5da45df..149f71d4 100644 --- a/libyasm/bytecode.c +++ b/libyasm/bytecode.c @@ -240,13 +240,6 @@ bc_delete(bytecode *bc) xfree(bc); } -int -bc_get_offset(/*@unused@*/ section *sect, /*@unused@*/ bytecode *bc, - /*@unused@*/ unsigned long *ret_val) -{ - return 0; /* TODO */ -} - void bc_print(FILE *f, const bytecode *bc) { @@ -312,20 +305,26 @@ bc_print(FILE *f, const bytecode *bc) fprintf(f, "%*sOffset=%lx\n", indent_level, "", bc->offset); } -void -bc_parser_finalize(bytecode *bc) +int +bc_calc_len(bytecode *bc, + intnum *(*resolve_label) (section *sect, /*@null@*/ bytecode *bc)) { switch (bc->type) { case BC_EMPTY: - /* FIXME: delete it (probably in bytecodes_ level, not here */ - InternalError(_("got empty bytecode in parser_finalize")); + InternalError(_("got empty bytecode in bc_calc_len")); + case BC_DATA: + break; + case BC_RESERVE: + break; + case BC_INCBIN: + break; default: if (bc->type < cur_arch->bc.type_max) - cur_arch->bc.bc_parser_finalize(bc); + return cur_arch->bc.bc_calc_len(bc, resolve_label); else InternalError(_("Unknown bytecode type")); - break; } + return 0; } void @@ -369,13 +368,16 @@ bcs_print(FILE *f, const bytecodehead *headp) } } -void -bcs_parser_finalize(bytecodehead *headp) +int +bcs_traverse(bytecodehead *headp, void *d, + int (*func) (bytecode *bc, /*@null@*/ void *d)) { bytecode *cur; STAILQ_FOREACH(cur, headp, link) - bc_parser_finalize(cur); + if (func(cur, d) == 0) + return 0; + return 1; } dataval * diff --git a/libyasm/bytecode.h b/libyasm/bytecode.h index 4b223cbb..e8607c12 100644 --- a/libyasm/bytecode.h +++ b/libyasm/bytecode.h @@ -56,18 +56,19 @@ void bc_set_multiple(bytecode *bc, /*@keep@*/ expr *e); void bc_delete(/*@only@*/ /*@null@*/ bytecode *bc); -/* Gets the offset of the bytecode specified by bc if possible. - * Return value is IF POSSIBLE, not the value. - */ -int bc_get_offset(section *sect, bytecode *bc, - /*@out@*/ unsigned long *ret_val); - void bc_print(FILE *f, const bytecode *bc); -void bc_parser_finalize(bytecode *bc); +/* Calculates length of bytecode, saving in bc structure. + * Returns whether the length is the minimum possible (1=yes, 0=no). + * resolve_label is the function used to determine the value (offset) of a + * in-file label (eg, not an EXTERN variable, which is indeterminate). + */ +int bc_calc_len(bytecode *bc, /*@only@*/ /*@null@*/ + intnum *(*resolve_label) (section *sect, + /*@null@*/ bytecode *bc)); -/* void bytecodes_initialize(bytecodehead *headp); */ -#define bytecodes_initialize(headp) STAILQ_INIT(headp) +/* void bcs_initialize(bytecodehead *headp); */ +#define bcs_initialize(headp) STAILQ_INIT(headp) void bcs_delete(bytecodehead *headp); @@ -83,7 +84,8 @@ void bcs_delete(bytecodehead *headp); void bcs_print(FILE *f, const bytecodehead *headp); -void bcs_parser_finalize(bytecodehead *headp); +int bcs_traverse(bytecodehead *headp, /*@null@*/ void *d, + int (*func) (bytecode *bc, /*@null@*/ void *d)); dataval *dv_new_expr(/*@keep@*/ expr *expn); dataval *dv_new_float(/*@keep@*/ floatnum *flt); diff --git a/libyasm/optimizer.h b/libyasm/optimizer.h index cb4aae10..23a465e2 100644 --- a/libyasm/optimizer.h +++ b/libyasm/optimizer.h @@ -35,6 +35,7 @@ struct optimizer { * This function takes the unoptimized linked list of sections and returns * an optimized linked list of sections ready for output to an object file. */ + sectionhead *(*optimize) (sectionhead *sections); }; /* Available optimizers */ diff --git a/libyasm/section.c b/libyasm/section.c index b791f9d9..f0c2f0d6 100644 --- a/libyasm/section.c +++ b/libyasm/section.c @@ -112,7 +112,7 @@ sections_switch_general(sectionhead *headp, const char *name, void *of_data, s->type = SECTION_GENERAL; s->data.general.name = xstrdup(name); s->data.general.of_data = of_data; - bytecodes_initialize(&s->bc); + bcs_initialize(&s->bc); s->opt_flags = 0; s->res_only = res_only; @@ -133,7 +133,7 @@ sections_switch_absolute(sectionhead *headp, expr *start) s->type = SECTION_ABSOLUTE; s->data.start = start; - bytecodes_initialize(&s->bc); + bcs_initialize(&s->bc); s->opt_flags = 0; s->res_only = 1; @@ -187,13 +187,16 @@ sections_print(FILE *f, const sectionhead *headp) } } -void -sections_parser_finalize(sectionhead *headp) +int +sections_traverse(sectionhead *headp, /*@null@*/ void *d, + int (*func) (section *sect, /*@null@*/ void *d)) { section *cur; STAILQ_FOREACH(cur, headp, link) - bcs_parser_finalize(&cur->bc); + if (func(cur, d) == 0) + return 0; + return 1; } bytecodehead * diff --git a/libyasm/section.h b/libyasm/section.h index 011d1c75..153490ec 100644 --- a/libyasm/section.h +++ b/libyasm/section.h @@ -45,7 +45,8 @@ void sections_delete(sectionhead *headp); void sections_print(FILE *f, const sectionhead *headp); -void sections_parser_finalize(sectionhead *headp); +int sections_traverse(sectionhead *headp, /*@null@*/ void *d, + int (*func) (section *sect, /*@null@*/ void *d)); /*@dependent@*/ bytecodehead *section_get_bytecodes(section *sect); diff --git a/modules/arch/x86/arch.c b/modules/arch/x86/arch.c index fc2ebd9c..06da214d 100644 --- a/modules/arch/x86/arch.c +++ b/modules/arch/x86/arch.c @@ -38,6 +38,6 @@ arch x86_arch = { X86_BYTECODE_TYPE_MAX, x86_bc_delete, x86_bc_print, - x86_bc_parser_finalize + x86_bc_calc_len } }; diff --git a/modules/arch/x86/bytecode.c b/modules/arch/x86/bytecode.c index 78edcb4a..a4237713 100644 --- a/modules/arch/x86/bytecode.c +++ b/modules/arch/x86/bytecode.c @@ -460,8 +460,10 @@ x86_bc_print(FILE *f, const bytecode *bc) } } -static void -x86_bc_parser_finalize_insn(x86_insn *insn) +static int +x86_bc_calc_len_insn(x86_insn *insn, /*@only@*/ /*@null@*/ + intnum *(*resolve_label) (section *sect, + /*@null@*/ bytecode *bc)) { effaddr *ea = insn->ea; x86_effaddr_data *ead = ea_get_data(ea); @@ -482,7 +484,7 @@ x86_bc_parser_finalize_insn(x86_insn *insn) ea->nosplit, &ea->len, &ead->modrm, &ead->valid_modrm, &ead->need_modrm, &ead->sib, &ead->valid_sib, &ead->need_sib)) - return; /* failed, don't bother checking rest of insn */ + return 0; /* failed, don't bother checking rest of insn */ } } @@ -513,21 +515,23 @@ x86_bc_parser_finalize_insn(x86_insn *insn) } } - + return 0; } -void -x86_bc_parser_finalize(bytecode *bc) +int +x86_bc_calc_len(bytecode *bc, + intnum *(*resolve_label) (section *sect, + /*@null@*/ bytecode *bc)) { x86_insn *insn; switch ((x86_bytecode_type)bc->type) { case X86_BC_INSN: insn = bc_get_data(bc); - x86_bc_parser_finalize_insn(insn); - break; + return x86_bc_calc_len_insn(insn, resolve_label); default: break; } + return 0; } diff --git a/modules/arch/x86/x86-int.h b/modules/arch/x86/x86-int.h index 50d3bcce..a75c9995 100644 --- a/modules/arch/x86/x86-int.h +++ b/modules/arch/x86/x86-int.h @@ -96,7 +96,9 @@ typedef struct x86_jmprel { void x86_bc_delete(bytecode *bc); void x86_bc_print(FILE *f, const bytecode *bc); -void x86_bc_parser_finalize(bytecode *bc); +int x86_bc_calc_len(bytecode *bc, /*@only@*/ /*@null@*/ + intnum *(*resolve_label) (section *sect, + /*@null@*/ bytecode *bc)); int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, unsigned char nosplit, unsigned char *displen, diff --git a/modules/arch/x86/x86arch.c b/modules/arch/x86/x86arch.c index fc2ebd9c..06da214d 100644 --- a/modules/arch/x86/x86arch.c +++ b/modules/arch/x86/x86arch.c @@ -38,6 +38,6 @@ arch x86_arch = { X86_BYTECODE_TYPE_MAX, x86_bc_delete, x86_bc_print, - x86_bc_parser_finalize + x86_bc_calc_len } }; diff --git a/modules/arch/x86/x86bc.c b/modules/arch/x86/x86bc.c index 78edcb4a..a4237713 100644 --- a/modules/arch/x86/x86bc.c +++ b/modules/arch/x86/x86bc.c @@ -460,8 +460,10 @@ x86_bc_print(FILE *f, const bytecode *bc) } } -static void -x86_bc_parser_finalize_insn(x86_insn *insn) +static int +x86_bc_calc_len_insn(x86_insn *insn, /*@only@*/ /*@null@*/ + intnum *(*resolve_label) (section *sect, + /*@null@*/ bytecode *bc)) { effaddr *ea = insn->ea; x86_effaddr_data *ead = ea_get_data(ea); @@ -482,7 +484,7 @@ x86_bc_parser_finalize_insn(x86_insn *insn) ea->nosplit, &ea->len, &ead->modrm, &ead->valid_modrm, &ead->need_modrm, &ead->sib, &ead->valid_sib, &ead->need_sib)) - return; /* failed, don't bother checking rest of insn */ + return 0; /* failed, don't bother checking rest of insn */ } } @@ -513,21 +515,23 @@ x86_bc_parser_finalize_insn(x86_insn *insn) } } - + return 0; } -void -x86_bc_parser_finalize(bytecode *bc) +int +x86_bc_calc_len(bytecode *bc, + intnum *(*resolve_label) (section *sect, + /*@null@*/ bytecode *bc)) { x86_insn *insn; switch ((x86_bytecode_type)bc->type) { case X86_BC_INSN: insn = bc_get_data(bc); - x86_bc_parser_finalize_insn(insn); - break; + return x86_bc_calc_len_insn(insn, resolve_label); default: break; } + return 0; } diff --git a/modules/optimizers/basic/basic-optimizer.c b/modules/optimizers/basic/basic-optimizer.c index d22e6af4..8a14b1f4 100644 --- a/modules/optimizers/basic/basic-optimizer.c +++ b/modules/optimizers/basic/basic-optimizer.c @@ -22,11 +22,78 @@ #include "util.h" RCSID("$IdPath$"); +#include "bytecode.h" +#include "section.h" + +#include "bc-int.h" + #include "optimizer.h" +#define SECTFLAG_NONE 0 +#define SECTFLAG_INPROGRESS (1<<0) +#define SECTFLAG_DONE (1<<1) + +#define BCFLAG_NONE 0 +#define BCFLAG_INPROGRESS (1<<0) +#define BCFLAG_DONE (1<<1) + +static /*@only@*/ /*@null@*/ intnum * +basic_optimize_resolve_label(section *sect, bytecode *bc) +{ + unsigned long flags; + + flags = section_get_opt_flags(sect); + + return NULL; +} + +static int +basic_optimize_bytecode(bytecode *bc, /*@unused@*/ /*@null@*/ void *d) +{ + bc->opt_flags = BCFLAG_INPROGRESS; + + bc_calc_len(bc, basic_optimize_resolve_label); + + bc->opt_flags = BCFLAG_DONE; + + return 1; +} + +static int +basic_optimize_section(section *sect, /*@unused@*/ /*@null@*/ void *d) +{ + section_set_opt_flags(sect, SECTFLAG_INPROGRESS); + + bcs_traverse(section_get_bytecodes(sect), NULL, basic_optimize_bytecode); + + section_set_opt_flags(sect, SECTFLAG_DONE); + + return 1; +} + +static sectionhead * +basic_optimize(sectionhead *sections) +{ + /* Optimization process: (essentially NASM's pass 1) + * Determine the size of all bytecodes. + * Check "critical" expressions (must be computable on the first pass, + * i.e. depend only on symbols before it). + * Differences from NASM: + * - right-hand side of EQU is /not/ a critical expr (as the entire file + * has already been parsed, we know all their values at this point). + * - not strictly top->bottom scanning; we scan through a section and + * hop to other sections as necessary. + */ + sections_traverse(sections, NULL, basic_optimize_section); + + /* NASM's pass 2 is output, so we just return. */ + return sections; +} + /* Define optimizer structure -- see optimizer.h for details */ optimizer basic_optimizer = { "Only the most basic optimizations", - "basic" + "basic", + basic_optimize }; diff --git a/modules/optimizers/basic/optimizer.c b/modules/optimizers/basic/optimizer.c index d22e6af4..8a14b1f4 100644 --- a/modules/optimizers/basic/optimizer.c +++ b/modules/optimizers/basic/optimizer.c @@ -22,11 +22,78 @@ #include "util.h" RCSID("$IdPath$"); +#include "bytecode.h" +#include "section.h" + +#include "bc-int.h" + #include "optimizer.h" +#define SECTFLAG_NONE 0 +#define SECTFLAG_INPROGRESS (1<<0) +#define SECTFLAG_DONE (1<<1) + +#define BCFLAG_NONE 0 +#define BCFLAG_INPROGRESS (1<<0) +#define BCFLAG_DONE (1<<1) + +static /*@only@*/ /*@null@*/ intnum * +basic_optimize_resolve_label(section *sect, bytecode *bc) +{ + unsigned long flags; + + flags = section_get_opt_flags(sect); + + return NULL; +} + +static int +basic_optimize_bytecode(bytecode *bc, /*@unused@*/ /*@null@*/ void *d) +{ + bc->opt_flags = BCFLAG_INPROGRESS; + + bc_calc_len(bc, basic_optimize_resolve_label); + + bc->opt_flags = BCFLAG_DONE; + + return 1; +} + +static int +basic_optimize_section(section *sect, /*@unused@*/ /*@null@*/ void *d) +{ + section_set_opt_flags(sect, SECTFLAG_INPROGRESS); + + bcs_traverse(section_get_bytecodes(sect), NULL, basic_optimize_bytecode); + + section_set_opt_flags(sect, SECTFLAG_DONE); + + return 1; +} + +static sectionhead * +basic_optimize(sectionhead *sections) +{ + /* Optimization process: (essentially NASM's pass 1) + * Determine the size of all bytecodes. + * Check "critical" expressions (must be computable on the first pass, + * i.e. depend only on symbols before it). + * Differences from NASM: + * - right-hand side of EQU is /not/ a critical expr (as the entire file + * has already been parsed, we know all their values at this point). + * - not strictly top->bottom scanning; we scan through a section and + * hop to other sections as necessary. + */ + sections_traverse(sections, NULL, basic_optimize_section); + + /* NASM's pass 2 is output, so we just return. */ + return sections; +} + /* Define optimizer structure -- see optimizer.h for details */ optimizer basic_optimizer = { "Only the most basic optimizations", - "basic" + "basic", + basic_optimize }; diff --git a/src/arch.h b/src/arch.h index 9dd71fb8..ece57e1b 100644 --- a/src/arch.h +++ b/src/arch.h @@ -38,7 +38,11 @@ struct arch { void (*bc_delete) (bytecode *bc); void (*bc_print) (FILE *f, const bytecode *bc); - void (*bc_parser_finalize) (bytecode *bc); + + /* See bytecode.h comments on bc_calc_len() */ + int (*bc_calc_len) (bytecode *bc, /*@only@*/ /*@null@*/ + intnum *(*resolve_label) (section *sect, /*@null@*/ + bytecode *bc)); } bc; }; diff --git a/src/arch/x86/arch.c b/src/arch/x86/arch.c index fc2ebd9c..06da214d 100644 --- a/src/arch/x86/arch.c +++ b/src/arch/x86/arch.c @@ -38,6 +38,6 @@ arch x86_arch = { X86_BYTECODE_TYPE_MAX, x86_bc_delete, x86_bc_print, - x86_bc_parser_finalize + x86_bc_calc_len } }; diff --git a/src/arch/x86/bytecode.c b/src/arch/x86/bytecode.c index 78edcb4a..a4237713 100644 --- a/src/arch/x86/bytecode.c +++ b/src/arch/x86/bytecode.c @@ -460,8 +460,10 @@ x86_bc_print(FILE *f, const bytecode *bc) } } -static void -x86_bc_parser_finalize_insn(x86_insn *insn) +static int +x86_bc_calc_len_insn(x86_insn *insn, /*@only@*/ /*@null@*/ + intnum *(*resolve_label) (section *sect, + /*@null@*/ bytecode *bc)) { effaddr *ea = insn->ea; x86_effaddr_data *ead = ea_get_data(ea); @@ -482,7 +484,7 @@ x86_bc_parser_finalize_insn(x86_insn *insn) ea->nosplit, &ea->len, &ead->modrm, &ead->valid_modrm, &ead->need_modrm, &ead->sib, &ead->valid_sib, &ead->need_sib)) - return; /* failed, don't bother checking rest of insn */ + return 0; /* failed, don't bother checking rest of insn */ } } @@ -513,21 +515,23 @@ x86_bc_parser_finalize_insn(x86_insn *insn) } } - + return 0; } -void -x86_bc_parser_finalize(bytecode *bc) +int +x86_bc_calc_len(bytecode *bc, + intnum *(*resolve_label) (section *sect, + /*@null@*/ bytecode *bc)) { x86_insn *insn; switch ((x86_bytecode_type)bc->type) { case X86_BC_INSN: insn = bc_get_data(bc); - x86_bc_parser_finalize_insn(insn); - break; + return x86_bc_calc_len_insn(insn, resolve_label); default: break; } + return 0; } diff --git a/src/arch/x86/x86-int.h b/src/arch/x86/x86-int.h index 50d3bcce..a75c9995 100644 --- a/src/arch/x86/x86-int.h +++ b/src/arch/x86/x86-int.h @@ -96,7 +96,9 @@ typedef struct x86_jmprel { void x86_bc_delete(bytecode *bc); void x86_bc_print(FILE *f, const bytecode *bc); -void x86_bc_parser_finalize(bytecode *bc); +int x86_bc_calc_len(bytecode *bc, /*@only@*/ /*@null@*/ + intnum *(*resolve_label) (section *sect, + /*@null@*/ bytecode *bc)); int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, unsigned char nosplit, unsigned char *displen, diff --git a/src/arch/x86/x86arch.c b/src/arch/x86/x86arch.c index fc2ebd9c..06da214d 100644 --- a/src/arch/x86/x86arch.c +++ b/src/arch/x86/x86arch.c @@ -38,6 +38,6 @@ arch x86_arch = { X86_BYTECODE_TYPE_MAX, x86_bc_delete, x86_bc_print, - x86_bc_parser_finalize + x86_bc_calc_len } }; diff --git a/src/arch/x86/x86bc.c b/src/arch/x86/x86bc.c index 78edcb4a..a4237713 100644 --- a/src/arch/x86/x86bc.c +++ b/src/arch/x86/x86bc.c @@ -460,8 +460,10 @@ x86_bc_print(FILE *f, const bytecode *bc) } } -static void -x86_bc_parser_finalize_insn(x86_insn *insn) +static int +x86_bc_calc_len_insn(x86_insn *insn, /*@only@*/ /*@null@*/ + intnum *(*resolve_label) (section *sect, + /*@null@*/ bytecode *bc)) { effaddr *ea = insn->ea; x86_effaddr_data *ead = ea_get_data(ea); @@ -482,7 +484,7 @@ x86_bc_parser_finalize_insn(x86_insn *insn) ea->nosplit, &ea->len, &ead->modrm, &ead->valid_modrm, &ead->need_modrm, &ead->sib, &ead->valid_sib, &ead->need_sib)) - return; /* failed, don't bother checking rest of insn */ + return 0; /* failed, don't bother checking rest of insn */ } } @@ -513,21 +515,23 @@ x86_bc_parser_finalize_insn(x86_insn *insn) } } - + return 0; } -void -x86_bc_parser_finalize(bytecode *bc) +int +x86_bc_calc_len(bytecode *bc, + intnum *(*resolve_label) (section *sect, + /*@null@*/ bytecode *bc)) { x86_insn *insn; switch ((x86_bytecode_type)bc->type) { case X86_BC_INSN: insn = bc_get_data(bc); - x86_bc_parser_finalize_insn(insn); - break; + return x86_bc_calc_len_insn(insn, resolve_label); default: break; } + return 0; } diff --git a/src/bytecode.c b/src/bytecode.c index d5da45df..149f71d4 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -240,13 +240,6 @@ bc_delete(bytecode *bc) xfree(bc); } -int -bc_get_offset(/*@unused@*/ section *sect, /*@unused@*/ bytecode *bc, - /*@unused@*/ unsigned long *ret_val) -{ - return 0; /* TODO */ -} - void bc_print(FILE *f, const bytecode *bc) { @@ -312,20 +305,26 @@ bc_print(FILE *f, const bytecode *bc) fprintf(f, "%*sOffset=%lx\n", indent_level, "", bc->offset); } -void -bc_parser_finalize(bytecode *bc) +int +bc_calc_len(bytecode *bc, + intnum *(*resolve_label) (section *sect, /*@null@*/ bytecode *bc)) { switch (bc->type) { case BC_EMPTY: - /* FIXME: delete it (probably in bytecodes_ level, not here */ - InternalError(_("got empty bytecode in parser_finalize")); + InternalError(_("got empty bytecode in bc_calc_len")); + case BC_DATA: + break; + case BC_RESERVE: + break; + case BC_INCBIN: + break; default: if (bc->type < cur_arch->bc.type_max) - cur_arch->bc.bc_parser_finalize(bc); + return cur_arch->bc.bc_calc_len(bc, resolve_label); else InternalError(_("Unknown bytecode type")); - break; } + return 0; } void @@ -369,13 +368,16 @@ bcs_print(FILE *f, const bytecodehead *headp) } } -void -bcs_parser_finalize(bytecodehead *headp) +int +bcs_traverse(bytecodehead *headp, void *d, + int (*func) (bytecode *bc, /*@null@*/ void *d)) { bytecode *cur; STAILQ_FOREACH(cur, headp, link) - bc_parser_finalize(cur); + if (func(cur, d) == 0) + return 0; + return 1; } dataval * diff --git a/src/bytecode.h b/src/bytecode.h index 4b223cbb..e8607c12 100644 --- a/src/bytecode.h +++ b/src/bytecode.h @@ -56,18 +56,19 @@ void bc_set_multiple(bytecode *bc, /*@keep@*/ expr *e); void bc_delete(/*@only@*/ /*@null@*/ bytecode *bc); -/* Gets the offset of the bytecode specified by bc if possible. - * Return value is IF POSSIBLE, not the value. - */ -int bc_get_offset(section *sect, bytecode *bc, - /*@out@*/ unsigned long *ret_val); - void bc_print(FILE *f, const bytecode *bc); -void bc_parser_finalize(bytecode *bc); +/* Calculates length of bytecode, saving in bc structure. + * Returns whether the length is the minimum possible (1=yes, 0=no). + * resolve_label is the function used to determine the value (offset) of a + * in-file label (eg, not an EXTERN variable, which is indeterminate). + */ +int bc_calc_len(bytecode *bc, /*@only@*/ /*@null@*/ + intnum *(*resolve_label) (section *sect, + /*@null@*/ bytecode *bc)); -/* void bytecodes_initialize(bytecodehead *headp); */ -#define bytecodes_initialize(headp) STAILQ_INIT(headp) +/* void bcs_initialize(bytecodehead *headp); */ +#define bcs_initialize(headp) STAILQ_INIT(headp) void bcs_delete(bytecodehead *headp); @@ -83,7 +84,8 @@ void bcs_delete(bytecodehead *headp); void bcs_print(FILE *f, const bytecodehead *headp); -void bcs_parser_finalize(bytecodehead *headp); +int bcs_traverse(bytecodehead *headp, /*@null@*/ void *d, + int (*func) (bytecode *bc, /*@null@*/ void *d)); dataval *dv_new_expr(/*@keep@*/ expr *expn); dataval *dv_new_float(/*@keep@*/ floatnum *flt); diff --git a/src/main.c b/src/main.c index e78e1dfd..e8c24538 100644 --- a/src/main.c +++ b/src/main.c @@ -36,6 +36,7 @@ #include "objfmt.h" #include "preproc.h" #include "parser.h" +#include "optimizer.h" #include "arch.h" @@ -189,9 +190,9 @@ main(int argc, char *argv[]) indent_level--; symrec_parser_finalize(); - sections_parser_finalize(sections); + basic_optimizer.optimize(sections); - fprintf(obj, "\nSections after post-parser-finalization:\n"); + fprintf(obj, "\nSections after optimization:\n"); indent_level++; sections_print(obj, sections); indent_level--; diff --git a/src/optimizer.h b/src/optimizer.h index cb4aae10..23a465e2 100644 --- a/src/optimizer.h +++ b/src/optimizer.h @@ -35,6 +35,7 @@ struct optimizer { * This function takes the unoptimized linked list of sections and returns * an optimized linked list of sections ready for output to an object file. */ + sectionhead *(*optimize) (sectionhead *sections); }; /* Available optimizers */ diff --git a/src/optimizers/basic/basic-optimizer.c b/src/optimizers/basic/basic-optimizer.c index d22e6af4..8a14b1f4 100644 --- a/src/optimizers/basic/basic-optimizer.c +++ b/src/optimizers/basic/basic-optimizer.c @@ -22,11 +22,78 @@ #include "util.h" RCSID("$IdPath$"); +#include "bytecode.h" +#include "section.h" + +#include "bc-int.h" + #include "optimizer.h" +#define SECTFLAG_NONE 0 +#define SECTFLAG_INPROGRESS (1<<0) +#define SECTFLAG_DONE (1<<1) + +#define BCFLAG_NONE 0 +#define BCFLAG_INPROGRESS (1<<0) +#define BCFLAG_DONE (1<<1) + +static /*@only@*/ /*@null@*/ intnum * +basic_optimize_resolve_label(section *sect, bytecode *bc) +{ + unsigned long flags; + + flags = section_get_opt_flags(sect); + + return NULL; +} + +static int +basic_optimize_bytecode(bytecode *bc, /*@unused@*/ /*@null@*/ void *d) +{ + bc->opt_flags = BCFLAG_INPROGRESS; + + bc_calc_len(bc, basic_optimize_resolve_label); + + bc->opt_flags = BCFLAG_DONE; + + return 1; +} + +static int +basic_optimize_section(section *sect, /*@unused@*/ /*@null@*/ void *d) +{ + section_set_opt_flags(sect, SECTFLAG_INPROGRESS); + + bcs_traverse(section_get_bytecodes(sect), NULL, basic_optimize_bytecode); + + section_set_opt_flags(sect, SECTFLAG_DONE); + + return 1; +} + +static sectionhead * +basic_optimize(sectionhead *sections) +{ + /* Optimization process: (essentially NASM's pass 1) + * Determine the size of all bytecodes. + * Check "critical" expressions (must be computable on the first pass, + * i.e. depend only on symbols before it). + * Differences from NASM: + * - right-hand side of EQU is /not/ a critical expr (as the entire file + * has already been parsed, we know all their values at this point). + * - not strictly top->bottom scanning; we scan through a section and + * hop to other sections as necessary. + */ + sections_traverse(sections, NULL, basic_optimize_section); + + /* NASM's pass 2 is output, so we just return. */ + return sections; +} + /* Define optimizer structure -- see optimizer.h for details */ optimizer basic_optimizer = { "Only the most basic optimizations", - "basic" + "basic", + basic_optimize }; diff --git a/src/optimizers/basic/optimizer.c b/src/optimizers/basic/optimizer.c index d22e6af4..8a14b1f4 100644 --- a/src/optimizers/basic/optimizer.c +++ b/src/optimizers/basic/optimizer.c @@ -22,11 +22,78 @@ #include "util.h" RCSID("$IdPath$"); +#include "bytecode.h" +#include "section.h" + +#include "bc-int.h" + #include "optimizer.h" +#define SECTFLAG_NONE 0 +#define SECTFLAG_INPROGRESS (1<<0) +#define SECTFLAG_DONE (1<<1) + +#define BCFLAG_NONE 0 +#define BCFLAG_INPROGRESS (1<<0) +#define BCFLAG_DONE (1<<1) + +static /*@only@*/ /*@null@*/ intnum * +basic_optimize_resolve_label(section *sect, bytecode *bc) +{ + unsigned long flags; + + flags = section_get_opt_flags(sect); + + return NULL; +} + +static int +basic_optimize_bytecode(bytecode *bc, /*@unused@*/ /*@null@*/ void *d) +{ + bc->opt_flags = BCFLAG_INPROGRESS; + + bc_calc_len(bc, basic_optimize_resolve_label); + + bc->opt_flags = BCFLAG_DONE; + + return 1; +} + +static int +basic_optimize_section(section *sect, /*@unused@*/ /*@null@*/ void *d) +{ + section_set_opt_flags(sect, SECTFLAG_INPROGRESS); + + bcs_traverse(section_get_bytecodes(sect), NULL, basic_optimize_bytecode); + + section_set_opt_flags(sect, SECTFLAG_DONE); + + return 1; +} + +static sectionhead * +basic_optimize(sectionhead *sections) +{ + /* Optimization process: (essentially NASM's pass 1) + * Determine the size of all bytecodes. + * Check "critical" expressions (must be computable on the first pass, + * i.e. depend only on symbols before it). + * Differences from NASM: + * - right-hand side of EQU is /not/ a critical expr (as the entire file + * has already been parsed, we know all their values at this point). + * - not strictly top->bottom scanning; we scan through a section and + * hop to other sections as necessary. + */ + sections_traverse(sections, NULL, basic_optimize_section); + + /* NASM's pass 2 is output, so we just return. */ + return sections; +} + /* Define optimizer structure -- see optimizer.h for details */ optimizer basic_optimizer = { "Only the most basic optimizations", - "basic" + "basic", + basic_optimize }; diff --git a/src/section.c b/src/section.c index b791f9d9..f0c2f0d6 100644 --- a/src/section.c +++ b/src/section.c @@ -112,7 +112,7 @@ sections_switch_general(sectionhead *headp, const char *name, void *of_data, s->type = SECTION_GENERAL; s->data.general.name = xstrdup(name); s->data.general.of_data = of_data; - bytecodes_initialize(&s->bc); + bcs_initialize(&s->bc); s->opt_flags = 0; s->res_only = res_only; @@ -133,7 +133,7 @@ sections_switch_absolute(sectionhead *headp, expr *start) s->type = SECTION_ABSOLUTE; s->data.start = start; - bytecodes_initialize(&s->bc); + bcs_initialize(&s->bc); s->opt_flags = 0; s->res_only = 1; @@ -187,13 +187,16 @@ sections_print(FILE *f, const sectionhead *headp) } } -void -sections_parser_finalize(sectionhead *headp) +int +sections_traverse(sectionhead *headp, /*@null@*/ void *d, + int (*func) (section *sect, /*@null@*/ void *d)) { section *cur; STAILQ_FOREACH(cur, headp, link) - bcs_parser_finalize(&cur->bc); + if (func(cur, d) == 0) + return 0; + return 1; } bytecodehead * diff --git a/src/section.h b/src/section.h index 011d1c75..153490ec 100644 --- a/src/section.h +++ b/src/section.h @@ -45,7 +45,8 @@ void sections_delete(sectionhead *headp); void sections_print(FILE *f, const sectionhead *headp); -void sections_parser_finalize(sectionhead *headp); +int sections_traverse(sectionhead *headp, /*@null@*/ void *d, + int (*func) (section *sect, /*@null@*/ void *d)); /*@dependent@*/ bytecodehead *section_get_bytecodes(section *sect);