From dd915a3eefbb80431ce389d7266bd1719c11d3f0 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Mon, 4 Nov 2002 04:47:41 +0000 Subject: [PATCH] Initial revision svn path=/trunk/yasm/; revision=794 --- modules/preprocs/nasm/macros.pl | 48 + modules/preprocs/nasm/nasm-eval.c | 825 +++++ modules/preprocs/nasm/nasm-eval.h | 28 + modules/preprocs/nasm/nasm-pp.c | 4459 ++++++++++++++++++++++++++++ modules/preprocs/nasm/nasm-pp.h | 20 + modules/preprocs/nasm/nasm.h | 850 ++++++ modules/preprocs/nasm/nasmlib.c | 1116 +++++++ modules/preprocs/nasm/nasmlib.h | 258 ++ modules/preprocs/nasm/standard.mac | 110 + src/preprocs/nasm/macros.pl | 48 + src/preprocs/nasm/nasm-eval.c | 825 +++++ src/preprocs/nasm/nasm-eval.h | 28 + src/preprocs/nasm/nasm-pp.c | 4459 ++++++++++++++++++++++++++++ src/preprocs/nasm/nasm-pp.h | 20 + src/preprocs/nasm/nasm.h | 850 ++++++ src/preprocs/nasm/nasmlib.c | 1116 +++++++ src/preprocs/nasm/nasmlib.h | 258 ++ src/preprocs/nasm/standard.mac | 110 + 18 files changed, 15428 insertions(+) create mode 100644 modules/preprocs/nasm/macros.pl create mode 100644 modules/preprocs/nasm/nasm-eval.c create mode 100644 modules/preprocs/nasm/nasm-eval.h create mode 100644 modules/preprocs/nasm/nasm-pp.c create mode 100644 modules/preprocs/nasm/nasm-pp.h create mode 100644 modules/preprocs/nasm/nasm.h create mode 100644 modules/preprocs/nasm/nasmlib.c create mode 100644 modules/preprocs/nasm/nasmlib.h create mode 100644 modules/preprocs/nasm/standard.mac create mode 100644 src/preprocs/nasm/macros.pl create mode 100644 src/preprocs/nasm/nasm-eval.c create mode 100644 src/preprocs/nasm/nasm-eval.h create mode 100644 src/preprocs/nasm/nasm-pp.c create mode 100644 src/preprocs/nasm/nasm-pp.h create mode 100644 src/preprocs/nasm/nasm.h create mode 100644 src/preprocs/nasm/nasmlib.c create mode 100644 src/preprocs/nasm/nasmlib.h create mode 100644 src/preprocs/nasm/standard.mac diff --git a/modules/preprocs/nasm/macros.pl b/modules/preprocs/nasm/macros.pl new file mode 100644 index 00000000..0934d174 --- /dev/null +++ b/modules/preprocs/nasm/macros.pl @@ -0,0 +1,48 @@ +#!/usr/bin/perl -w +# +# macros.pl produce macros.c from standard.mac +# +# The Netwide Assembler is copyright (C) 1996 Simon Tatham and +# Julian Hall. All rights reserved. The software is +# redistributable under the licence given in the file "Licence" +# distributed in the NASM archive. + +use strict; + +my $fname; +my $line = 0; +my $index = 0; +my $tasm_count; + +undef $tasm_count; + +open(OUTPUT,">macros.c") or die "unable to open macros.c\n"; + +print OUTPUT "/* This file auto-generated from standard.mac by macros.pl" . +" - don't edit it */\n\n#include \n\nstatic const char *stdmac[] = {\n"; + +foreach $fname ( @ARGV ) { + open(INPUT,$fname) or die "unable to open $fname\n"; + while () { + $line++; + chomp; + if (m/^\s*\*END\*TASM\*MACROS\*\s*$/) { + $tasm_count = $index; + } elsif (m/^\s*((\s*([^\"\';\s]+|\"[^\"]*\"|\'[^\']*\'))*)\s*(;.*)?$/) { + $_ = $1; + s/\\/\\\\/g; + s/"/\\"/g; + if (length > 0) { + print OUTPUT " \"$_\",\n"; + $index++; + } + } else { + die "$fname:$line: error unterminated quote"; + } + } + close(INPUT); +} +print OUTPUT " NULL\n};\n"; +$tasm_count = $index unless ( defined($tasm_count) ); +print OUTPUT "#define TASM_MACRO_COUNT $tasm_count\n"; +close(OUTPUT); diff --git a/modules/preprocs/nasm/nasm-eval.c b/modules/preprocs/nasm/nasm-eval.c new file mode 100644 index 00000000..28aca642 --- /dev/null +++ b/modules/preprocs/nasm/nasm-eval.c @@ -0,0 +1,825 @@ +/* eval.c expression evaluator for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version 27/iii/95 by Simon Tatham + */ + +#include +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" +#include "eval.h" +#include "labels.h" + +#define TEMPEXPRS_DELTA 128 +#define TEMPEXPR_DELTA 8 + +static scanner scan; /* Address of scanner routine */ +static efunc error; /* Address of error reporting routine */ +static lfunc labelfunc; /* Address of label routine */ + +static struct ofmt *outfmt; /* Structure of addresses of output routines */ + +static expr **tempexprs = NULL; +static int ntempexprs; +static int tempexprs_size = 0; + +static expr *tempexpr; +static int ntempexpr; +static int tempexpr_size; + +static struct tokenval *tokval; /* The current token */ +static int i; /* The t_type of tokval */ + +static void *scpriv; +static loc_t *location; /* Pointer to current line's segment,offset */ +static int *opflags; + +static struct eval_hints *hint; + +extern int in_abs_seg; /* ABSOLUTE segment flag */ +extern long abs_seg; /* ABSOLUTE segment */ +extern long abs_offset; /* ABSOLUTE segment offset */ + +/* + * Unimportant cleanup is done to avoid confusing people who are trying + * to debug real memory leaks + */ +void eval_cleanup(void) +{ + while (ntempexprs) + nasm_free (tempexprs[--ntempexprs]); + nasm_free (tempexprs); +} + +/* + * Construct a temporary expression. + */ +static void begintemp(void) +{ + tempexpr = NULL; + tempexpr_size = ntempexpr = 0; +} + +static void addtotemp(long type, long value) +{ + while (ntempexpr >= tempexpr_size) { + tempexpr_size += TEMPEXPR_DELTA; + tempexpr = nasm_realloc(tempexpr, + tempexpr_size*sizeof(*tempexpr)); + } + tempexpr[ntempexpr].type = type; + tempexpr[ntempexpr++].value = value; +} + +static expr *finishtemp(void) +{ + addtotemp (0L, 0L); /* terminate */ + while (ntempexprs >= tempexprs_size) { + tempexprs_size += TEMPEXPRS_DELTA; + tempexprs = nasm_realloc(tempexprs, + tempexprs_size*sizeof(*tempexprs)); + } + return tempexprs[ntempexprs++] = tempexpr; +} + +/* + * Add two vector datatypes. We have some bizarre behaviour on far- + * absolute segment types: we preserve them during addition _only_ + * if one of the segments is a truly pure scalar. + */ +static expr *add_vectors(expr *p, expr *q) +{ + int preserve; + + preserve = is_really_simple(p) || is_really_simple(q); + + begintemp(); + + while (p->type && q->type && + p->type < EXPR_SEGBASE+SEG_ABS && + q->type < EXPR_SEGBASE+SEG_ABS) + { + int lasttype; + + if (p->type > q->type) { + addtotemp(q->type, q->value); + lasttype = q++->type; + } else if (p->type < q->type) { + addtotemp(p->type, p->value); + lasttype = p++->type; + } else { /* *p and *q have same type */ + long sum = p->value + q->value; + if (sum) + addtotemp(p->type, sum); + lasttype = p->type; + p++, q++; + } + if (lasttype == EXPR_UNKNOWN) { + return finishtemp(); + } + } + while (p->type && + (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) + { + addtotemp(p->type, p->value); + p++; + } + while (q->type && + (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) + { + addtotemp(q->type, q->value); + q++; + } + + return finishtemp(); +} + +/* + * Multiply a vector by a scalar. Strip far-absolute segment part + * if present. + * + * Explicit treatment of UNKNOWN is not required in this routine, + * since it will silently do the Right Thing anyway. + * + * If `affect_hints' is set, we also change the hint type to + * NOTBASE if a MAKEBASE hint points at a register being + * multiplied. This allows [eax*1+ebx] to hint EBX rather than EAX + * as the base register. + */ +static expr *scalar_mult(expr *vect, long scalar, int affect_hints) +{ + expr *p = vect; + + while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) { + p->value = scalar * (p->value); + if (hint && hint->type == EAH_MAKEBASE && + p->type == hint->base && affect_hints) + hint->type = EAH_NOTBASE; + p++; + } + p->type = 0; + + return vect; +} + +static expr *scalarvect (long scalar) +{ + begintemp(); + addtotemp(EXPR_SIMPLE, scalar); + return finishtemp(); +} + +static expr *unknown_expr (void) +{ + begintemp(); + addtotemp(EXPR_UNKNOWN, 1L); + return finishtemp(); +} + +/* + * The SEG operator: calculate the segment part of a relocatable + * value. Return NULL, as usual, if an error occurs. Report the + * error too. + */ +static expr *segment_part (expr *e) +{ + long seg; + + if (is_unknown(e)) + return unknown_expr(); + + if (!is_reloc(e)) { + error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); + return NULL; + } + + seg = reloc_seg(e); + if (seg == NO_SEG) { + error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); + return NULL; + } else if (seg & SEG_ABS) { + return scalarvect(seg & ~SEG_ABS); + } else if (seg & 1) { + error(ERR_NONFATAL, "SEG applied to something which" + " is already a segment base"); + return NULL; + } + else { + long base = outfmt->segbase(seg+1); + + begintemp(); + addtotemp((base == NO_SEG ? EXPR_UNKNOWN : EXPR_SEGBASE+base), 1L); + return finishtemp(); + } +} + +/* + * Recursive-descent parser. Called with a single boolean operand, + * which is TRUE if the evaluation is critical (i.e. unresolved + * symbols are an error condition). Must update the global `i' to + * reflect the token after the parsed string. May return NULL. + * + * evaluate() should report its own errors: on return it is assumed + * that if NULL has been returned, the error has already been + * reported. + */ + +/* + * Grammar parsed is: + * + * expr : bexpr [ WRT expr6 ] + * bexpr : rexp0 or expr0 depending on relative-mode setting + * rexp0 : rexp1 [ {||} rexp1...] + * rexp1 : rexp2 [ {^^} rexp2...] + * rexp2 : rexp3 [ {&&} rexp3...] + * rexp3 : expr0 [ {=,==,<>,!=,<,>,<=,>=} expr0 ] + * expr0 : expr1 [ {|} expr1...] + * expr1 : expr2 [ {^} expr2...] + * expr2 : expr3 [ {&} expr3...] + * expr3 : expr4 [ {<<,>>} expr4...] + * expr4 : expr5 [ {+,-} expr5...] + * expr5 : expr6 [ {*,/,%,//,%%} expr6...] + * expr6 : { ~,+,-,SEG } expr6 + * | (bexpr) + * | symbol + * | $ + * | number + */ + +static expr *rexp0(int), *rexp1(int), *rexp2(int), *rexp3(int); + +static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int); +static expr *expr4(int), *expr5(int), *expr6(int); + +static expr *(*bexpr)(int); + +static expr *rexp0(int critical) +{ + expr *e, *f; + + e = rexp1(critical); + if (!e) + return NULL; + + while (i == TOKEN_DBL_OR) + { + i = scan(scpriv, tokval); + f = rexp1(critical); + if (!f) + return NULL; + if (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f))) + { + error(ERR_NONFATAL, "`|' operator may only be applied to" + " scalar values"); + } + + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect ((long) (reloc_value(e) || reloc_value(f))); + } + return e; +} + +static expr *rexp1(int critical) +{ + expr *e, *f; + + e = rexp2(critical); + if (!e) + return NULL; + + while (i == TOKEN_DBL_XOR) + { + i = scan(scpriv, tokval); + f = rexp2(critical); + if (!f) + return NULL; + if (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f))) + { + error(ERR_NONFATAL, "`^' operator may only be applied to" + " scalar values"); + } + + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect ((long) (!reloc_value(e) ^ !reloc_value(f))); + } + return e; +} + +static expr *rexp2(int critical) +{ + expr *e, *f; + + e = rexp3(critical); + if (!e) + return NULL; + while (i == TOKEN_DBL_AND) + { + i = scan(scpriv, tokval); + f = rexp3(critical); + if (!f) + return NULL; + if (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f))) + { + error(ERR_NONFATAL, "`&' operator may only be applied to" + " scalar values"); + } + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect ((long) (reloc_value(e) && reloc_value(f))); + } + return e; +} + +static expr *rexp3(int critical) +{ + expr *e, *f; + long v; + + e = expr0(critical); + if (!e) + return NULL; + + while (i == TOKEN_EQ || i == TOKEN_LT || i == TOKEN_GT || + i == TOKEN_NE || i == TOKEN_LE || i == TOKEN_GE) + { + int j = i; + i = scan(scpriv, tokval); + f = expr0(critical); + if (!f) + return NULL; + + e = add_vectors (e, scalar_mult(f, -1L, FALSE)); + + switch (j) + { + case TOKEN_EQ: case TOKEN_NE: + if (is_unknown(e)) + v = -1; /* means unknown */ + else if (!is_really_simple(e) || reloc_value(e) != 0) + v = (j == TOKEN_NE); /* unequal, so return TRUE if NE */ + else + v = (j == TOKEN_EQ); /* equal, so return TRUE if EQ */ + break; + default: + if (is_unknown(e)) + v = -1; /* means unknown */ + else if (!is_really_simple(e)) { + error(ERR_NONFATAL, "`%s': operands differ by a non-scalar", + (j == TOKEN_LE ? "<=" : j == TOKEN_LT ? "<" : + j == TOKEN_GE ? ">=" : ">")); + v = 0; /* must set it to _something_ */ + } else { + int vv = reloc_value(e); + if (vv == 0) + v = (j == TOKEN_LE || j == TOKEN_GE); + else if (vv > 0) + v = (j == TOKEN_GE || j == TOKEN_GT); + else /* vv < 0 */ + v = (j == TOKEN_LE || j == TOKEN_LT); + } + break; + } + + if (v == -1) + e = unknown_expr(); + else + e = scalarvect(v); + } + return e; +} + +static expr *expr0(int critical) +{ + expr *e, *f; + + e = expr1(critical); + if (!e) + return NULL; + + while (i == '|') + { + i = scan(scpriv, tokval); + f = expr1(critical); + if (!f) + return NULL; + if (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f))) + { + error(ERR_NONFATAL, "`|' operator may only be applied to" + " scalar values"); + } + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect (reloc_value(e) | reloc_value(f)); + } + return e; +} + +static expr *expr1(int critical) +{ + expr *e, *f; + + e = expr2(critical); + if (!e) + return NULL; + + while (i == '^') { + i = scan(scpriv, tokval); + f = expr2(critical); + if (!f) + return NULL; + if (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f))) + { + error(ERR_NONFATAL, "`^' operator may only be applied to" + " scalar values"); + } + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect (reloc_value(e) ^ reloc_value(f)); + } + return e; +} + +static expr *expr2(int critical) +{ + expr *e, *f; + + e = expr3(critical); + if (!e) + return NULL; + + while (i == '&') { + i = scan(scpriv, tokval); + f = expr3(critical); + if (!f) + return NULL; + if (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f))) + { + error(ERR_NONFATAL, "`&' operator may only be applied to" + " scalar values"); + } + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect (reloc_value(e) & reloc_value(f)); + } + return e; +} + +static expr *expr3(int critical) +{ + expr *e, *f; + + e = expr4(critical); + if (!e) + return NULL; + + while (i == TOKEN_SHL || i == TOKEN_SHR) + { + int j = i; + i = scan(scpriv, tokval); + f = expr4(critical); + if (!f) + return NULL; + if (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f))) + { + error(ERR_NONFATAL, "shift operator may only be applied to" + " scalar values"); + } else if (is_just_unknown(e) || is_just_unknown(f)) { + e = unknown_expr(); + } else switch (j) { + case TOKEN_SHL: + e = scalarvect (reloc_value(e) << reloc_value(f)); + break; + case TOKEN_SHR: + e = scalarvect (((unsigned long)reloc_value(e)) >> + reloc_value(f)); + break; + } + } + return e; +} + +static expr *expr4(int critical) +{ + expr *e, *f; + + e = expr5(critical); + if (!e) + return NULL; + while (i == '+' || i == '-') + { + int j = i; + i = scan(scpriv, tokval); + f = expr5(critical); + if (!f) + return NULL; + switch (j) { + case '+': + e = add_vectors (e, f); + break; + case '-': + e = add_vectors (e, scalar_mult(f, -1L, FALSE)); + break; + } + } + return e; +} + +static expr *expr5(int critical) +{ + expr *e, *f; + + e = expr6(critical); + if (!e) + return NULL; + while (i == '*' || i == '/' || i == '%' || + i == TOKEN_SDIV || i == TOKEN_SMOD) + { + int j = i; + i = scan(scpriv, tokval); + f = expr6(critical); + if (!f) + return NULL; + if (j != '*' && (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f)))) + { + error(ERR_NONFATAL, "division operator may only be applied to" + " scalar values"); + return NULL; + } + if (j != '*' && !is_unknown(f) && reloc_value(f) == 0) { + error(ERR_NONFATAL, "division by zero"); + return NULL; + } + switch (j) { + case '*': + if (is_simple(e)) + e = scalar_mult (f, reloc_value(e), TRUE); + else if (is_simple(f)) + e = scalar_mult (e, reloc_value(f), TRUE); + else if (is_just_unknown(e) && is_just_unknown(f)) + e = unknown_expr(); + else { + error(ERR_NONFATAL, "unable to multiply two " + "non-scalar objects"); + return NULL; + } + break; + case '/': + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect (((unsigned long)reloc_value(e)) / + ((unsigned long)reloc_value(f))); + break; + case '%': + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect (((unsigned long)reloc_value(e)) % + ((unsigned long)reloc_value(f))); + break; + case TOKEN_SDIV: + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect (((signed long)reloc_value(e)) / + ((signed long)reloc_value(f))); + break; + case TOKEN_SMOD: + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect (((signed long)reloc_value(e)) % + ((signed long)reloc_value(f))); + break; + } + } + return e; +} + +static expr *expr6(int critical) +{ + long type; + expr *e; + long label_seg, label_ofs; + + if (i == '-') { + i = scan(scpriv, tokval); + e = expr6(critical); + if (!e) + return NULL; + return scalar_mult (e, -1L, FALSE); + } else if (i == '+') { + i = scan(scpriv, tokval); + return expr6(critical); + } else if (i == '~') { + i = scan(scpriv, tokval); + e = expr6(critical); + if (!e) + return NULL; + if (is_just_unknown(e)) + return unknown_expr(); + else if (!is_simple(e)) { + error(ERR_NONFATAL, "`~' operator may only be applied to" + " scalar values"); + return NULL; + } + return scalarvect(~reloc_value(e)); + } else if (i == TOKEN_SEG) { + i = scan(scpriv, tokval); + e = expr6(critical); + if (!e) + return NULL; + e = segment_part(e); + if (!e) + return NULL; + if (is_unknown(e) && critical) { + error(ERR_NONFATAL, "unable to determine segment base"); + return NULL; + } + return e; + } else if (i == '(') { + i = scan(scpriv, tokval); + e = bexpr(critical); + if (!e) + return NULL; + if (i != ')') { + error(ERR_NONFATAL, "expecting `)'"); + return NULL; + } + i = scan(scpriv, tokval); + return e; + } + else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID || + i == TOKEN_HERE || i == TOKEN_BASE) + { + begintemp(); + switch (i) { + case TOKEN_NUM: + addtotemp(EXPR_SIMPLE, tokval->t_integer); + break; + case TOKEN_REG: + addtotemp(tokval->t_integer, 1L); + if (hint && hint->type == EAH_NOHINT) + hint->base = tokval->t_integer, hint->type = EAH_MAKEBASE; + break; + case TOKEN_ID: + case TOKEN_HERE: + case TOKEN_BASE: + /* + * If !location->known, this indicates that no + * symbol, Here or Base references are valid because we + * are in preprocess-only mode. + */ + if (!location->known) { + error(ERR_NONFATAL, + "%s not supported in preprocess-only mode", + (i == TOKEN_ID ? "symbol references" : + i == TOKEN_HERE ? "`$'" : "`$$'")); + addtotemp(EXPR_UNKNOWN, 1L); + break; + } + + type = EXPR_SIMPLE; /* might get overridden by UNKNOWN */ + if (i == TOKEN_BASE) + { + label_seg = in_abs_seg ? abs_seg : location->segment; + label_ofs = 0; + } else if (i == TOKEN_HERE) { + label_seg = in_abs_seg ? abs_seg : location->segment; + label_ofs = in_abs_seg ? abs_offset : location->offset; + } else { + if (!labelfunc(tokval->t_charptr,&label_seg,&label_ofs)) + { + if (critical == 2) { + error (ERR_NONFATAL, "symbol `%s' undefined", + tokval->t_charptr); + return NULL; + } else if (critical == 1) { + error (ERR_NONFATAL, + "symbol `%s' not defined before use", + tokval->t_charptr); + return NULL; + } else { + if (opflags) + *opflags |= 1; + type = EXPR_UNKNOWN; + label_seg = NO_SEG; + label_ofs = 1; + } + } + if (opflags && is_extern (tokval->t_charptr)) + *opflags |= OPFLAG_EXTERN; + } + addtotemp(type, label_ofs); + if (label_seg!=NO_SEG) + addtotemp(EXPR_SEGBASE + label_seg, 1L); + break; + } + i = scan(scpriv, tokval); + return finishtemp(); + } else { + error(ERR_NONFATAL, "expression syntax error"); + return NULL; + } +} + +void eval_global_info (struct ofmt *output, lfunc lookup_label, loc_t *locp) +{ + outfmt = output; + labelfunc = lookup_label; + location = locp; +} + +expr *evaluate (scanner sc, void *scprivate, struct tokenval *tv, + int *fwref, int critical, efunc report_error, + struct eval_hints *hints) +{ + expr *e; + expr *f = NULL; + + hint = hints; + if (hint) + hint->type = EAH_NOHINT; + + if (critical & CRITICAL) { + critical &= ~CRITICAL; + bexpr = rexp0; + } else + bexpr = expr0; + + scan = sc; + scpriv = scprivate; + tokval = tv; + error = report_error; + opflags = fwref; + + if (tokval->t_type == TOKEN_INVALID) + i = scan(scpriv, tokval); + else + i = tokval->t_type; + + while (ntempexprs) /* initialise temporary storage */ + nasm_free (tempexprs[--ntempexprs]); + + e = bexpr (critical); + if (!e) + return NULL; + + if (i == TOKEN_WRT) { + i = scan(scpriv, tokval); /* eat the WRT */ + f = expr6 (critical); + if (!f) + return NULL; + } + e = scalar_mult (e, 1L, FALSE); /* strip far-absolute segment part */ + if (f) { + expr *g; + if (is_just_unknown(f)) + g = unknown_expr(); + else { + long value; + begintemp(); + if (!is_reloc(f)) { + error(ERR_NONFATAL, "invalid right-hand operand to WRT"); + return NULL; + } + value = reloc_seg(f); + if (value == NO_SEG) + value = reloc_value(f) | SEG_ABS; + else if (!(value & SEG_ABS) && !(value % 2) && critical) + { + error(ERR_NONFATAL, "invalid right-hand operand to WRT"); + return NULL; + } + addtotemp(EXPR_WRT, value); + g = finishtemp(); + } + e = add_vectors (e, g); + } + return e; +} diff --git a/modules/preprocs/nasm/nasm-eval.h b/modules/preprocs/nasm/nasm-eval.h new file mode 100644 index 00000000..a933cbfd --- /dev/null +++ b/modules/preprocs/nasm/nasm-eval.h @@ -0,0 +1,28 @@ +/* eval.h header file for eval.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_EVAL_H +#define NASM_EVAL_H + +/* + * Called once to tell the evaluator what output format is + * providing segment-base details, and what function can be used to + * look labels up. + */ +void eval_global_info (struct ofmt *output, lfunc lookup_label, loc_t *locp); + +/* + * The evaluator itself. + */ +expr *evaluate (scanner sc, void *scprivate, struct tokenval *tv, + int *fwref, int critical, efunc report_error, + struct eval_hints *hints); + +void eval_cleanup(void); + +#endif diff --git a/modules/preprocs/nasm/nasm-pp.c b/modules/preprocs/nasm/nasm-pp.c new file mode 100644 index 00000000..0770812e --- /dev/null +++ b/modules/preprocs/nasm/nasm-pp.c @@ -0,0 +1,4459 @@ +/* -*- mode: c; c-file-style: "bsd" -*- */ +/* preproc.c macro preprocessor for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version 18/iii/97 by Simon Tatham + */ + +/* Typical flow of text through preproc + * + * pp_getline gets tokenised lines, either + * + * from a macro expansion + * + * or + * { + * read_line gets raw text from stdmacpos, or predef, or current input file + * tokenise converts to tokens + * } + * + * expand_mmac_params is used to expand %1 etc., unless a macro is being + * defined or a false conditional is being processed + * (%0, %1, %+1, %-1, %%foo + * + * do_directive checks for directives + * + * expand_smacro is used to expand single line macros + * + * expand_mmacro is used to expand multi-line macros + * + * detoken is used to convert the line back to text + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" + +typedef struct SMacro SMacro; +typedef struct MMacro MMacro; +typedef struct Context Context; +typedef struct Token Token; +typedef struct Blocks Blocks; +typedef struct Line Line; +typedef struct Include Include; +typedef struct Cond Cond; +typedef struct IncPath IncPath; + +/* + * Store the definition of a single-line macro. + */ +struct SMacro +{ + SMacro *next; + char *name; + int casesense; + int nparam; + int in_progress; + Token *expansion; +}; + +/* + * Store the definition of a multi-line macro. This is also used to + * store the interiors of `%rep...%endrep' blocks, which are + * effectively self-re-invoking multi-line macros which simply + * don't have a name or bother to appear in the hash tables. %rep + * blocks are signified by having a NULL `name' field. + * + * In a MMacro describing a `%rep' block, the `in_progress' field + * isn't merely boolean, but gives the number of repeats left to + * run. + * + * The `next' field is used for storing MMacros in hash tables; the + * `next_active' field is for stacking them on istk entries. + * + * When a MMacro is being expanded, `params', `iline', `nparam', + * `paramlen', `rotate' and `unique' are local to the invocation. + */ +struct MMacro +{ + MMacro *next; + char *name; + int casesense; + int nparam_min, nparam_max; + int plus; /* is the last parameter greedy? */ + int nolist; /* is this macro listing-inhibited? */ + int in_progress; + Token *dlist; /* All defaults as one list */ + Token **defaults; /* Parameter default pointers */ + int ndefs; /* number of default parameters */ + Line *expansion; + + MMacro *next_active; + MMacro *rep_nest; /* used for nesting %rep */ + Token **params; /* actual parameters */ + Token *iline; /* invocation line */ + int nparam, rotate, *paramlen; + unsigned long unique; + int lineno; /* Current line number on expansion */ +}; + +/* + * The context stack is composed of a linked list of these. + */ +struct Context +{ + Context *next; + SMacro *localmac; + char *name; + unsigned long number; +}; + +/* + * This is the internal form which we break input lines up into. + * Typically stored in linked lists. + * + * Note that `type' serves a double meaning: TOK_SMAC_PARAM is not + * necessarily used as-is, but is intended to denote the number of + * the substituted parameter. So in the definition + * + * %define a(x,y) ( (x) & ~(y) ) + * + * the token representing `x' will have its type changed to + * TOK_SMAC_PARAM, but the one representing `y' will be + * TOK_SMAC_PARAM+1. + * + * TOK_INTERNAL_STRING is a dirty hack: it's a single string token + * which doesn't need quotes around it. Used in the pre-include + * mechanism as an alternative to trying to find a sensible type of + * quote to use on the filename we were passed. + */ +struct Token +{ + Token *next; + char *text; + SMacro *mac; /* associated macro for TOK_SMAC_END */ + int type; +}; +enum +{ + TOK_WHITESPACE = 1, TOK_COMMENT, TOK_ID, TOK_PREPROC_ID, TOK_STRING, + TOK_NUMBER, TOK_SMAC_END, TOK_OTHER, TOK_SMAC_PARAM, + TOK_INTERNAL_STRING +}; + +/* + * Multi-line macro definitions are stored as a linked list of + * these, which is essentially a container to allow several linked + * lists of Tokens. + * + * Note that in this module, linked lists are treated as stacks + * wherever possible. For this reason, Lines are _pushed_ on to the + * `expansion' field in MMacro structures, so that the linked list, + * if walked, would give the macro lines in reverse order; this + * means that we can walk the list when expanding a macro, and thus + * push the lines on to the `expansion' field in _istk_ in reverse + * order (so that when popped back off they are in the right + * order). It may seem cockeyed, and it relies on my design having + * an even number of steps in, but it works... + * + * Some of these structures, rather than being actual lines, are + * markers delimiting the end of the expansion of a given macro. + * This is for use in the cycle-tracking and %rep-handling code. + * Such structures have `finishes' non-NULL, and `first' NULL. All + * others have `finishes' NULL, but `first' may still be NULL if + * the line is blank. + */ +struct Line +{ + Line *next; + MMacro *finishes; + Token *first; +}; + +/* + * To handle an arbitrary level of file inclusion, we maintain a + * stack (ie linked list) of these things. + */ +struct Include +{ + Include *next; + FILE *fp; + Cond *conds; + Line *expansion; + char *fname; + int lineno, lineinc; + MMacro *mstk; /* stack of active macros/reps */ +}; + +/* + * Include search path. This is simply a list of strings which get + * prepended, in turn, to the name of an include file, in an + * attempt to find the file if it's not in the current directory. + */ +struct IncPath +{ + IncPath *next; + char *path; +}; + +/* + * Conditional assembly: we maintain a separate stack of these for + * each level of file inclusion. (The only reason we keep the + * stacks separate is to ensure that a stray `%endif' in a file + * included from within the true branch of a `%if' won't terminate + * it and cause confusion: instead, rightly, it'll cause an error.) + */ +struct Cond +{ + Cond *next; + int state; +}; +enum +{ + /* + * These states are for use just after %if or %elif: IF_TRUE + * means the condition has evaluated to truth so we are + * currently emitting, whereas IF_FALSE means we are not + * currently emitting but will start doing so if a %else comes + * up. In these states, all directives are admissible: %elif, + * %else and %endif. (And of course %if.) + */ + COND_IF_TRUE, COND_IF_FALSE, + /* + * These states come up after a %else: ELSE_TRUE means we're + * emitting, and ELSE_FALSE means we're not. In ELSE_* states, + * any %elif or %else will cause an error. + */ + COND_ELSE_TRUE, COND_ELSE_FALSE, + /* + * This state means that we're not emitting now, and also that + * nothing until %endif will be emitted at all. It's for use in + * two circumstances: (i) when we've had our moment of emission + * and have now started seeing %elifs, and (ii) when the + * condition construct in question is contained within a + * non-emitting branch of a larger condition construct. + */ + COND_NEVER +}; +#define emitting(x) ( (x) == COND_IF_TRUE || (x) == COND_ELSE_TRUE ) + +/* + * These defines are used as the possible return values for do_directive + */ +#define NO_DIRECTIVE_FOUND 0 +#define DIRECTIVE_FOUND 1 + +/* + * Condition codes. Note that we use c_ prefix not C_ because C_ is + * used in nasm.h for the "real" condition codes. At _this_ level, + * we treat CXZ and ECXZ as condition codes, albeit non-invertible + * ones, so we need a different enum... + */ +static const char *conditions[] = { + "a", "ae", "b", "be", "c", "cxz", "e", "ecxz", "g", "ge", "l", "le", + "na", "nae", "nb", "nbe", "nc", "ne", "ng", "nge", "nl", "nle", "no", + "np", "ns", "nz", "o", "p", "pe", "po", "s", "z" +}; +enum +{ + c_A, c_AE, c_B, c_BE, c_C, c_CXZ, c_E, c_ECXZ, c_G, c_GE, c_L, c_LE, + c_NA, c_NAE, c_NB, c_NBE, c_NC, c_NE, c_NG, c_NGE, c_NL, c_NLE, c_NO, + c_NP, c_NS, c_NZ, c_O, c_P, c_PE, c_PO, c_S, c_Z +}; +static int inverse_ccs[] = { + c_NA, c_NAE, c_NB, c_NBE, c_NC, -1, c_NE, -1, c_NG, c_NGE, c_NL, c_NLE, + c_A, c_AE, c_B, c_BE, c_C, c_E, c_G, c_GE, c_L, c_LE, c_O, c_P, c_S, + c_Z, c_NO, c_NP, c_PO, c_PE, c_NS, c_NZ +}; + +/* + * Directive names. + */ +static const char *directives[] = { + "%arg", + "%assign", "%clear", "%define", "%elif", "%elifctx", "%elifdef", + "%elifid", "%elifidn", "%elifidni", "%elifmacro", "%elifnctx", "%elifndef", + "%elifnid", "%elifnidn", "%elifnidni", "%elifnmacro", "%elifnnum", "%elifnstr", + "%elifnum", "%elifstr", "%else", "%endif", "%endm", "%endmacro", + "%endrep", "%error", "%exitrep", "%iassign", "%idefine", "%if", + "%ifctx", "%ifdef", "%ifid", "%ifidn", "%ifidni", "%ifmacro", "%ifnctx", + "%ifndef", "%ifnid", "%ifnidn", "%ifnidni", "%ifnmacro", "%ifnnum", + "%ifnstr", "%ifnum", "%ifstr", "%imacro", "%include", + "%ixdefine", "%line", + "%local", + "%macro", "%pop", "%push", "%rep", "%repl", "%rotate", + "%stacksize", + "%strlen", "%substr", "%undef", "%xdefine" +}; +enum +{ + PP_ARG, + PP_ASSIGN, PP_CLEAR, PP_DEFINE, PP_ELIF, PP_ELIFCTX, PP_ELIFDEF, + PP_ELIFID, PP_ELIFIDN, PP_ELIFIDNI, PP_ELIFMACRO, PP_ELIFNCTX, PP_ELIFNDEF, + PP_ELIFNID, PP_ELIFNIDN, PP_ELIFNIDNI, PP_ELIFNMACRO, PP_ELIFNNUM, PP_ELIFNSTR, + PP_ELIFNUM, PP_ELIFSTR, PP_ELSE, PP_ENDIF, PP_ENDM, PP_ENDMACRO, + PP_ENDREP, PP_ERROR, PP_EXITREP, PP_IASSIGN, PP_IDEFINE, PP_IF, + PP_IFCTX, PP_IFDEF, PP_IFID, PP_IFIDN, PP_IFIDNI, PP_IFMACRO, PP_IFNCTX, + PP_IFNDEF, PP_IFNID, PP_IFNIDN, PP_IFNIDNI, PP_IFNMACRO, PP_IFNNUM, + PP_IFNSTR, PP_IFNUM, PP_IFSTR, PP_IMACRO, PP_INCLUDE, + PP_IXDEFINE, PP_LINE, + PP_LOCAL, + PP_MACRO, PP_POP, PP_PUSH, PP_REP, PP_REPL, PP_ROTATE, + PP_STACKSIZE, + PP_STRLEN, PP_SUBSTR, PP_UNDEF, PP_XDEFINE +}; + +/* If this is a an IF, ELIF, ELSE or ENDIF keyword */ +static int is_condition(int arg) +{ + return ((arg >= PP_ELIF) && (arg <= PP_ENDIF)) || + ((arg >= PP_IF) && (arg <= PP_IFSTR)); +} + +/* For TASM compatibility we need to be able to recognise TASM compatible + * conditional compilation directives. Using the NASM pre-processor does + * not work, so we look for them specifically from the following list and + * then jam in the equivalent NASM directive into the input stream. + */ + +#ifndef MAX +# define MAX(a,b) ( ((a) > (b)) ? (a) : (b)) +#endif + +enum +{ + TM_ARG, TM_ELIF, TM_ELSE, TM_ENDIF, TM_IF, TM_IFDEF, TM_IFDIFI, + TM_IFNDEF, TM_INCLUDE, TM_LOCAL +}; + +static const char *tasm_directives[] = { + "arg", "elif", "else", "endif", "if", "ifdef", "ifdifi", + "ifndef", "include", "local" +}; + +static int StackSize = 4; +static char *StackPointer = "ebp"; +static int ArgOffset = 8; +static int LocalOffset = 4; + + +static Context *cstk; +static Include *istk; +static IncPath *ipath = NULL; + +static efunc _error; /* Pointer to client-provided error reporting function */ +static evalfunc evaluate; + +static int pass; /* HACK: pass 0 = generate dependencies only */ + +static unsigned long unique; /* unique identifier numbers */ + +static Line *predef = NULL; + +static ListGen *list; + +/* + * The number of hash values we use for the macro lookup tables. + * FIXME: We should *really* be able to configure this at run time, + * or even have the hash table automatically expanding when necessary. + */ +#define NHASH 31 + +/* + * The current set of multi-line macros we have defined. + */ +static MMacro *mmacros[NHASH]; + +/* + * The current set of single-line macros we have defined. + */ +static SMacro *smacros[NHASH]; + +/* + * The multi-line macro we are currently defining, or the %rep + * block we are currently reading, if any. + */ +static MMacro *defining; + +/* + * The number of macro parameters to allocate space for at a time. + */ +#define PARAM_DELTA 16 + +/* + * The standard macro set: defined as `static char *stdmac[]'. Also + * gives our position in the macro set, when we're processing it. + */ +#include "macros.c" +static const char **stdmacpos; + +/* + * The extra standard macros that come from the object format, if + * any. + */ +static const char **extrastdmac = NULL; +int any_extrastdmac; + +/* + * Tokens are allocated in blocks to improve speed + */ +#define TOKEN_BLOCKSIZE 4096 +static Token *freeTokens = NULL; +struct Blocks { + Blocks *next; + void *chunk; +}; + +static Blocks blocks = { NULL, NULL }; + +/* + * Forward declarations. + */ +static Token *expand_mmac_params(Token * tline); +static Token *expand_smacro(Token * tline); +static Token *expand_id(Token * tline); +static Context *get_ctx(char *name, int all_contexts); +static void make_tok_num(Token * tok, long val); +static void error(int severity, const char *fmt, ...); +static void *new_Block(size_t size); +static void delete_Blocks(void); +static Token *new_Token(Token * next, int type, char *text, int txtlen); +static Token *delete_Token(Token * t); + +/* + * Macros for safe checking of token pointers, avoid *(NULL) + */ +#define tok_type_(x,t) ((x) && (x)->type == (t)) +#define skip_white_(x) if (tok_type_((x), TOK_WHITESPACE)) (x)=(x)->next +#define tok_is_(x,v) (tok_type_((x), TOK_OTHER) && !strcmp((x)->text,(v))) +#define tok_isnt_(x,v) ((x) && ((x)->type!=TOK_OTHER || strcmp((x)->text,(v)))) + +/* Handle TASM specific directives, which do not contain a % in + * front of them. We do it here because I could not find any other + * place to do it for the moment, and it is a hack (ideally it would + * be nice to be able to use the NASM pre-processor to do it). + */ +static char * +check_tasm_directive(char *line) +{ + int i, j, k, m, len; + char *p = line, *oldline, oldchar; + + /* Skip whitespace */ + while (isspace(*p) && *p != 0) + p++; + + /* Binary search for the directive name */ + i = -1; + j = elements(tasm_directives); + len = 0; + while (!isspace(p[len]) && p[len] != 0) + len++; + if (len) + { + oldchar = p[len]; + p[len] = 0; + while (j - i > 1) + { + k = (j + i) / 2; + m = nasm_stricmp(p, tasm_directives[k]); + if (m == 0) + { + /* We have found a directive, so jam a % in front of it + * so that NASM will then recognise it as one if it's own. + */ + p[len] = oldchar; + len = strlen(p); + oldline = line; + line = nasm_malloc(len + 2); + line[0] = '%'; + if (k == TM_IFDIFI) + { + /* NASM does not recognise IFDIFI, so we convert it to + * %ifdef BOGUS. This is not used in NASM comaptible + * code, but does need to parse for the TASM macro + * package. + */ + strcpy(line + 1, "ifdef BOGUS"); + } + else + { + memcpy(line + 1, p, len + 1); + } + nasm_free(oldline); + return line; + } + else if (m < 0) + { + j = k; + } + else + i = k; + } + p[len] = oldchar; + } + return line; +} + +/* + * The pre-preprocessing stage... This function translates line + * number indications as they emerge from GNU cpp (`# lineno "file" + * flags') into NASM preprocessor line number indications (`%line + * lineno file'). + */ +static char * +prepreproc(char *line) +{ + int lineno, fnlen; + char *fname, *oldline; + + if (line[0] == '#' && line[1] == ' ') + { + oldline = line; + fname = oldline + 2; + lineno = atoi(fname); + fname += strspn(fname, "0123456789 "); + if (*fname == '"') + fname++; + fnlen = strcspn(fname, "\""); + line = nasm_malloc(20 + fnlen); + sprintf(line, "%%line %d %.*s", lineno, fnlen, fname); + nasm_free(oldline); + } + if (tasm_compatible_mode) + return check_tasm_directive(line); + return line; +} + +/* + * The hash function for macro lookups. Note that due to some + * macros having case-insensitive names, the hash function must be + * invariant under case changes. We implement this by applying a + * perfectly normal hash function to the uppercase of the string. + */ +static int +hash(char *s) +{ + unsigned int h = 0; + int i = 0; + /* + * Powers of three, mod 31. + */ + static const int multipliers[] = { + 1, 3, 9, 27, 19, 26, 16, 17, 20, 29, 25, 13, 8, 24, 10, + 30, 28, 22, 4, 12, 5, 15, 14, 11, 2, 6, 18, 23, 7, 21 + }; + + + while (*s) + { + h += multipliers[i] * (unsigned char) (toupper(*s)); + s++; + if (++i >= elements(multipliers)) + i = 0; + } + h %= NHASH; + return h; +} + +/* + * Free a linked list of tokens. + */ +static void +free_tlist(Token * list) +{ + while (list) + { + list = delete_Token(list); + } +} + +/* + * Free a linked list of lines. + */ +static void +free_llist(Line * list) +{ + Line *l; + while (list) + { + l = list; + list = list->next; + free_tlist(l->first); + nasm_free(l); + } +} + +/* + * Free an MMacro + */ +static void +free_mmacro(MMacro * m) +{ + nasm_free(m->name); + free_tlist(m->dlist); + nasm_free(m->defaults); + free_llist(m->expansion); + nasm_free(m); +} + +/* + * Pop the context stack. + */ +static void +ctx_pop(void) +{ + Context *c = cstk; + SMacro *smac, *s; + + cstk = cstk->next; + smac = c->localmac; + while (smac) + { + s = smac; + smac = smac->next; + nasm_free(s->name); + free_tlist(s->expansion); + nasm_free(s); + } + nasm_free(c->name); + nasm_free(c); +} + +#define BUF_DELTA 512 +/* + * Read a line from the top file in istk, handling multiple CR/LFs + * at the end of the line read, and handling spurious ^Zs. Will + * return lines from the standard macro set if this has not already + * been done. + */ +static char * +read_line(void) +{ + char *buffer, *p, *q; + int bufsize, continued_count; + + if (stdmacpos) + { + if (*stdmacpos) + { + char *ret = nasm_strdup(*stdmacpos++); + if (!*stdmacpos && any_extrastdmac) + { + stdmacpos = extrastdmac; + any_extrastdmac = FALSE; + return ret; + } + /* + * Nasty hack: here we push the contents of `predef' on + * to the top-level expansion stack, since this is the + * most convenient way to implement the pre-include and + * pre-define features. + */ + if (!*stdmacpos) + { + Line *pd, *l; + Token *head, **tail, *t; + + for (pd = predef; pd; pd = pd->next) + { + head = NULL; + tail = &head; + for (t = pd->first; t; t = t->next) + { + *tail = new_Token(NULL, t->type, t->text, 0); + tail = &(*tail)->next; + } + l = nasm_malloc(sizeof(Line)); + l->next = istk->expansion; + l->first = head; + l->finishes = FALSE; + istk->expansion = l; + } + } + return ret; + } + else + { + stdmacpos = NULL; + } + } + + bufsize = BUF_DELTA; + buffer = nasm_malloc(BUF_DELTA); + p = buffer; + continued_count = 0; + while (1) + { + q = fgets(p, bufsize - (p - buffer), istk->fp); + if (!q) + break; + p += strlen(p); + if (p > buffer && p[-1] == '\n') + { + /* Convert backslash-CRLF line continuation sequences into + nothing at all (for DOS and Windows) */ + if (((p - 2) > buffer) && (p[-3] == '\\') && (p[-2] == '\r')) { + p -= 3; + *p = 0; + continued_count++; + } + /* Also convert backslash-LF line continuation sequences into + nothing at all (for Unix) */ + else if (((p - 1) > buffer) && (p[-2] == '\\')) { + p -= 2; + *p = 0; + continued_count++; + } + else { + break; + } + } + if (p - buffer > bufsize - 10) + { + long offset = p - buffer; + bufsize += BUF_DELTA; + buffer = nasm_realloc(buffer, bufsize); + p = buffer + offset; /* prevent stale-pointer problems */ + } + } + + if (!q && p == buffer) + { + nasm_free(buffer); + return NULL; + } + + src_set_linnum(src_get_linnum() + istk->lineinc + (continued_count * istk->lineinc)); + + /* + * Play safe: remove CRs as well as LFs, if any of either are + * present at the end of the line. + */ + while (--p >= buffer && (*p == '\n' || *p == '\r')) + *p = '\0'; + + /* + * Handle spurious ^Z, which may be inserted into source files + * by some file transfer utilities. + */ + buffer[strcspn(buffer, "\032")] = '\0'; + + list->line(LIST_READ, buffer); + + return buffer; +} + +/* + * Tokenise a line of text. This is a very simple process since we + * don't need to parse the value out of e.g. numeric tokens: we + * simply split one string into many. + */ +static Token * +tokenise(char *line) +{ + char *p = line; + int type; + Token *list = NULL; + Token *t, **tail = &list; + + while (*line) + { + p = line; + if (*p == '%') + { + p++; + if ( isdigit(*p) || + ((*p == '-' || *p == '+') && isdigit(p[1])) || + ((*p == '+') && (isspace(p[1]) || !p[1]))) + { + do + { + p++; + } + while (isdigit(*p)); + type = TOK_PREPROC_ID; + } + else if (*p == '{') + { + p++; + while (*p && *p != '}') + { + p[-1] = *p; + p++; + } + p[-1] = '\0'; + if (*p) + p++; + type = TOK_PREPROC_ID; + } + else if (isidchar(*p) || + ((*p == '!' || *p == '%' || *p == '$') && + isidchar(p[1]))) + { + do + { + p++; + } + while (isidchar(*p)); + type = TOK_PREPROC_ID; + } + else + { + type = TOK_OTHER; + if (*p == '%') + p++; + } + } + else if (isidstart(*p) || (*p == '$' && isidstart(p[1]))) + { + type = TOK_ID; + p++; + while (*p && isidchar(*p)) + p++; + } + else if (*p == '\'' || *p == '"') + { + /* + * A string token. + */ + char c = *p; + p++; + type = TOK_STRING; + while (*p && *p != c) + p++; + if (*p) + { + p++; + } + else + { + error(ERR_WARNING, "unterminated string"); + } + } + else if (isnumstart(*p)) + { + /* + * A number token. + */ + type = TOK_NUMBER; + p++; + while (*p && isnumchar(*p)) + p++; + } + else if (isspace(*p)) + { + type = TOK_WHITESPACE; + p++; + while (*p && isspace(*p)) + p++; + /* + * Whitespace just before end-of-line is discarded by + * pretending it's a comment; whitespace just before a + * comment gets lumped into the comment. + */ + if (!*p || *p == ';') + { + type = TOK_COMMENT; + while (*p) + p++; + } + } + else if (*p == ';') + { + type = TOK_COMMENT; + while (*p) + p++; + } + else + { + /* + * Anything else is an operator of some kind. We check + * for all the double-character operators (>>, <<, //, + * %%, <=, >=, ==, !=, <>, &&, ||, ^^), but anything + * else is a single-character operator. + */ + type = TOK_OTHER; + if ((p[0] == '>' && p[1] == '>') || + (p[0] == '<' && p[1] == '<') || + (p[0] == '/' && p[1] == '/') || + (p[0] == '<' && p[1] == '=') || + (p[0] == '>' && p[1] == '=') || + (p[0] == '=' && p[1] == '=') || + (p[0] == '!' && p[1] == '=') || + (p[0] == '<' && p[1] == '>') || + (p[0] == '&' && p[1] == '&') || + (p[0] == '|' && p[1] == '|') || + (p[0] == '^' && p[1] == '^')) + { + p++; + } + p++; + } + if (type != TOK_COMMENT) + { + *tail = t = new_Token(NULL, type, line, p - line); + tail = &t->next; + } + line = p; + } + return list; +} + +/* + * this function allocates a new managed block of memory and + * returns a pointer to the block. The managed blocks are + * deleted only all at once by the delete_Blocks function. + */ +static void * +new_Block(size_t size) +{ + Blocks *b = &blocks; + + /* first, get to the end of the linked list */ + while (b->next) + b = b->next; + /* now allocate the requested chunk */ + b->chunk = nasm_malloc(size); + + /* now allocate a new block for the next request */ + b->next = nasm_malloc(sizeof(Blocks)); + /* and initialize the contents of the new block */ + b->next->next = NULL; + b->next->chunk = NULL; + return b->chunk; +} + +/* + * this function deletes all managed blocks of memory + */ +static void +delete_Blocks(void) +{ + Blocks *a,*b = &blocks; + + /* + * keep in mind that the first block, pointed to by blocks + * is a static and not dynamically allocated, so we don't + * free it. + */ + while (b) + { + if (b->chunk) + nasm_free(b->chunk); + a = b; + b = b->next; + if (a != &blocks) + nasm_free(a); + } +} + +/* + * this function creates a new Token and passes a pointer to it + * back to the caller. It sets the type and text elements, and + * also the mac and next elements to NULL. + */ +static Token * +new_Token(Token * next, int type, char *text, int txtlen) +{ + Token *t; + int i; + + if (freeTokens == NULL) + { + freeTokens = (Token *)new_Block(TOKEN_BLOCKSIZE * sizeof(Token)); + for (i = 0; i < TOKEN_BLOCKSIZE - 1; i++) + freeTokens[i].next = &freeTokens[i + 1]; + freeTokens[i].next = NULL; + } + t = freeTokens; + freeTokens = t->next; + t->next = next; + t->mac = NULL; + t->type = type; + if (type == TOK_WHITESPACE || text == NULL) + { + t->text = NULL; + } + else + { + if (txtlen == 0) + txtlen = strlen(text); + t->text = nasm_malloc(1 + txtlen); + strncpy(t->text, text, txtlen); + t->text[txtlen] = '\0'; + } + return t; +} + +static Token * +delete_Token(Token * t) +{ + Token *next = t->next; + nasm_free(t->text); + t->next = freeTokens; + freeTokens = t; + return next; +} + +/* + * Convert a line of tokens back into text. + * If expand_locals is not zero, identifiers of the form "%$*xxx" + * will be transformed into ..@ctxnum.xxx + */ +static char * +detoken(Token * tlist, int expand_locals) +{ + Token *t; + int len; + char *line, *p; + + len = 0; + for (t = tlist; t; t = t->next) + { + if (t->type == TOK_PREPROC_ID && t->text[1] == '!') + { + char *p = getenv(t->text + 2); + nasm_free(t->text); + if (p) + t->text = nasm_strdup(p); + else + t->text = NULL; + } + /* Expand local macros here and not during preprocessing */ + if (expand_locals && + t->type == TOK_PREPROC_ID && t->text && + t->text[0] == '%' && t->text[1] == '$') + { + Context *ctx = get_ctx(t->text, FALSE); + if (ctx) + { + char buffer[40]; + char *p, *q = t->text + 2; + + q += strspn(q, "$"); + sprintf(buffer, "..@%lu.", ctx->number); + p = nasm_strcat(buffer, q); + nasm_free(t->text); + t->text = p; + } + } + if (t->type == TOK_WHITESPACE) + { + len++; + } + else if (t->text) + { + len += strlen(t->text); + } + } + p = line = nasm_malloc(len + 1); + for (t = tlist; t; t = t->next) + { + if (t->type == TOK_WHITESPACE) + { + *p = ' '; + p++; + *p = '\0'; + } + else if (t->text) + { + strcpy(p, t->text); + p += strlen(p); + } + } + *p = '\0'; + return line; +} + +/* + * A scanner, suitable for use by the expression evaluator, which + * operates on a line of Tokens. Expects a pointer to a pointer to + * the first token in the line to be passed in as its private_data + * field. + */ +static int +ppscan(void *private_data, struct tokenval *tokval) +{ + Token **tlineptr = private_data; + Token *tline; + + do + { + tline = *tlineptr; + *tlineptr = tline ? tline->next : NULL; + } + while (tline && (tline->type == TOK_WHITESPACE || + tline->type == TOK_COMMENT)); + + if (!tline) + return tokval->t_type = TOKEN_EOS; + + if (tline->text[0] == '$' && !tline->text[1]) + return tokval->t_type = TOKEN_HERE; + if (tline->text[0] == '$' && tline->text[1] == '$' && !tline->text[2]) + return tokval->t_type = TOKEN_BASE; + + if (tline->type == TOK_ID) + { + tokval->t_charptr = tline->text; + if (tline->text[0] == '$') + { + tokval->t_charptr++; + return tokval->t_type = TOKEN_ID; + } + + /* + * This is the only special case we actually need to worry + * about in this restricted context. + */ + if (!nasm_stricmp(tline->text, "seg")) + return tokval->t_type = TOKEN_SEG; + + return tokval->t_type = TOKEN_ID; + } + + if (tline->type == TOK_NUMBER) + { + int rn_error; + + tokval->t_integer = readnum(tline->text, &rn_error); + if (rn_error) + return tokval->t_type = TOKEN_ERRNUM; + tokval->t_charptr = NULL; + return tokval->t_type = TOKEN_NUM; + } + + if (tline->type == TOK_STRING) + { + int rn_warn; + char q, *r; + int l; + + r = tline->text; + q = *r++; + l = strlen(r); + + if (l == 0 || r[l - 1] != q) + return tokval->t_type = TOKEN_ERRNUM; + tokval->t_integer = readstrnum(r, l - 1, &rn_warn); + if (rn_warn) + error(ERR_WARNING | ERR_PASS1, "character constant too long"); + tokval->t_charptr = NULL; + return tokval->t_type = TOKEN_NUM; + } + + if (tline->type == TOK_OTHER) + { + if (!strcmp(tline->text, "<<")) + return tokval->t_type = TOKEN_SHL; + if (!strcmp(tline->text, ">>")) + return tokval->t_type = TOKEN_SHR; + if (!strcmp(tline->text, "//")) + return tokval->t_type = TOKEN_SDIV; + if (!strcmp(tline->text, "%%")) + return tokval->t_type = TOKEN_SMOD; + if (!strcmp(tline->text, "==")) + return tokval->t_type = TOKEN_EQ; + if (!strcmp(tline->text, "<>")) + return tokval->t_type = TOKEN_NE; + if (!strcmp(tline->text, "!=")) + return tokval->t_type = TOKEN_NE; + if (!strcmp(tline->text, "<=")) + return tokval->t_type = TOKEN_LE; + if (!strcmp(tline->text, ">=")) + return tokval->t_type = TOKEN_GE; + if (!strcmp(tline->text, "&&")) + return tokval->t_type = TOKEN_DBL_AND; + if (!strcmp(tline->text, "^^")) + return tokval->t_type = TOKEN_DBL_XOR; + if (!strcmp(tline->text, "||")) + return tokval->t_type = TOKEN_DBL_OR; + } + + /* + * We have no other options: just return the first character of + * the token text. + */ + return tokval->t_type = tline->text[0]; +} + +/* + * Compare a string to the name of an existing macro; this is a + * simple wrapper which calls either strcmp or nasm_stricmp + * depending on the value of the `casesense' parameter. + */ +static int +mstrcmp(char *p, char *q, int casesense) +{ + return casesense ? strcmp(p, q) : nasm_stricmp(p, q); +} + +/* + * Return the Context structure associated with a %$ token. Return + * NULL, having _already_ reported an error condition, if the + * context stack isn't deep enough for the supplied number of $ + * signs. + * If all_contexts == TRUE, contexts that enclose current are + * also scanned for such smacro, until it is found; if not - + * only the context that directly results from the number of $'s + * in variable's name. + */ +static Context * +get_ctx(char *name, int all_contexts) +{ + Context *ctx; + SMacro *m; + int i; + + if (!name || name[0] != '%' || name[1] != '$') + return NULL; + + if (!cstk) + { + error(ERR_NONFATAL, "`%s': context stack is empty", name); + return NULL; + } + + for (i = strspn(name + 2, "$"), ctx = cstk; (i > 0) && ctx; i--) + { + ctx = ctx->next; +/* i--; Lino - 02/25/02 */ + } + if (!ctx) + { + error(ERR_NONFATAL, "`%s': context stack is only" + " %d level%s deep", name, i - 1, (i == 2 ? "" : "s")); + return NULL; + } + if (!all_contexts) + return ctx; + + do + { + /* Search for this smacro in found context */ + m = ctx->localmac; + while (m) + { + if (!mstrcmp(m->name, name, m->casesense)) + return ctx; + m = m->next; + } + ctx = ctx->next; + } + while (ctx); + return NULL; +} + +/* Add a slash to the end of a path if it is missing. We use the + * forward slash to make it compatible with Unix systems. + */ +static void +backslash(char *s) +{ + int pos = strlen(s); + if (s[pos - 1] != '\\' && s[pos - 1] != '/') + { + s[pos] = '/'; + s[pos + 1] = '\0'; + } +} + +/* + * Open an include file. This routine must always return a valid + * file pointer if it returns - it's responsible for throwing an + * ERR_FATAL and bombing out completely if not. It should also try + * the include path one by one until it finds the file or reaches + * the end of the path. + */ +static FILE * +inc_fopen(char *file) +{ + FILE *fp; + char *prefix = "", *combine; + IncPath *ip = ipath; + static int namelen = 0; + int len = strlen(file); + + while (1) + { + combine = nasm_malloc(strlen(prefix) + 1 + len + 1); + strcpy(combine, prefix); + if (prefix[0] != 0) + backslash(combine); + strcat(combine, file); + fp = fopen(combine, "r"); + if (pass == 0 && fp) + { + namelen += strlen(combine) + 1; + if (namelen > 62) + { + printf(" \\\n "); + namelen = 2; + } + printf(" %s", combine); + } + nasm_free(combine); + if (fp) + return fp; + if (!ip) + break; + prefix = ip->path; + ip = ip->next; + } + + error(ERR_FATAL, "unable to open include file `%s'", file); + return NULL; /* never reached - placate compilers */ +} + +/* + * Determine if we should warn on defining a single-line macro of + * name `name', with `nparam' parameters. If nparam is 0 or -1, will + * return TRUE if _any_ single-line macro of that name is defined. + * Otherwise, will return TRUE if a single-line macro with either + * `nparam' or no parameters is defined. + * + * If a macro with precisely the right number of parameters is + * defined, or nparam is -1, the address of the definition structure + * will be returned in `defn'; otherwise NULL will be returned. If `defn' + * is NULL, no action will be taken regarding its contents, and no + * error will occur. + * + * Note that this is also called with nparam zero to resolve + * `ifdef'. + * + * If you already know which context macro belongs to, you can pass + * the context pointer as first parameter; if you won't but name begins + * with %$ the context will be automatically computed. If all_contexts + * is true, macro will be searched in outer contexts as well. + */ +static int +smacro_defined(Context * ctx, char *name, int nparam, SMacro ** defn, + int nocase) +{ + SMacro *m; + + if (ctx) + m = ctx->localmac; + else if (name[0] == '%' && name[1] == '$') + { + if (cstk) + ctx = get_ctx(name, FALSE); + if (!ctx) + return FALSE; /* got to return _something_ */ + m = ctx->localmac; + } + else + m = smacros[hash(name)]; + + while (m) + { + if (!mstrcmp(m->name, name, m->casesense && nocase) && + (nparam <= 0 || m->nparam == 0 || nparam == m->nparam)) + { + if (defn) + { + if (nparam == m->nparam || nparam == -1) + *defn = m; + else + *defn = NULL; + } + return TRUE; + } + m = m->next; + } + + return FALSE; +} + +/* + * Count and mark off the parameters in a multi-line macro call. + * This is called both from within the multi-line macro expansion + * code, and also to mark off the default parameters when provided + * in a %macro definition line. + */ +static void +count_mmac_params(Token * t, int *nparam, Token *** params) +{ + int paramsize, brace; + + *nparam = paramsize = 0; + *params = NULL; + while (t) + { + if (*nparam >= paramsize) + { + paramsize += PARAM_DELTA; + *params = nasm_realloc(*params, sizeof(**params) * paramsize); + } + skip_white_(t); + brace = FALSE; + if (tok_is_(t, "{")) + brace = TRUE; + (*params)[(*nparam)++] = t; + while (tok_isnt_(t, brace ? "}" : ",")) + t = t->next; + if (t) + { /* got a comma/brace */ + t = t->next; + if (brace) + { + /* + * Now we've found the closing brace, look further + * for the comma. + */ + skip_white_(t); + if (tok_isnt_(t, ",")) + { + error(ERR_NONFATAL, + "braces do not enclose all of macro parameter"); + while (tok_isnt_(t, ",")) + t = t->next; + } + if (t) + t = t->next; /* eat the comma */ + } + } + } +} + +/* + * Determine whether one of the various `if' conditions is true or + * not. + * + * We must free the tline we get passed. + */ +static int +if_condition(Token * tline, int i) +{ + int j, casesense; + Token *t, *tt, **tptr, *origline; + struct tokenval tokval; + expr *evalresult; + + origline = tline; + + switch (i) + { + case PP_IFCTX: + case PP_ELIFCTX: + case PP_IFNCTX: + case PP_ELIFNCTX: + j = FALSE; /* have we matched yet? */ + while (cstk && tline) + { + skip_white_(tline); + if (!tline || tline->type != TOK_ID) + { + error(ERR_NONFATAL, + "`%s' expects context identifiers", + directives[i]); + free_tlist(origline); + return -1; + } + if (!nasm_stricmp(tline->text, cstk->name)) + j = TRUE; + tline = tline->next; + } + if (i == PP_IFNCTX || i == PP_ELIFNCTX) + j = !j; + free_tlist(origline); + return j; + + case PP_IFDEF: + case PP_ELIFDEF: + case PP_IFNDEF: + case PP_ELIFNDEF: + j = FALSE; /* have we matched yet? */ + while (tline) + { + skip_white_(tline); + if (!tline || (tline->type != TOK_ID && + (tline->type != TOK_PREPROC_ID || + tline->text[1] != '$'))) + { + error(ERR_NONFATAL, + "`%s' expects macro identifiers", + directives[i]); + free_tlist(origline); + return -1; + } + if (smacro_defined(NULL, tline->text, 0, NULL, 1)) + j = TRUE; + tline = tline->next; + } + if (i == PP_IFNDEF || i == PP_ELIFNDEF) + j = !j; + free_tlist(origline); + return j; + + case PP_IFIDN: + case PP_ELIFIDN: + case PP_IFNIDN: + case PP_ELIFNIDN: + case PP_IFIDNI: + case PP_ELIFIDNI: + case PP_IFNIDNI: + case PP_ELIFNIDNI: + tline = expand_smacro(tline); + t = tt = tline; + while (tok_isnt_(tt, ",")) + tt = tt->next; + if (!tt) + { + error(ERR_NONFATAL, + "`%s' expects two comma-separated arguments", + directives[i]); + free_tlist(tline); + return -1; + } + tt = tt->next; + casesense = (i == PP_IFIDN || i == PP_ELIFIDN || + i == PP_IFNIDN || i == PP_ELIFNIDN); + j = TRUE; /* assume equality unless proved not */ + while ((t->type != TOK_OTHER || strcmp(t->text, ",")) && tt) + { + if (tt->type == TOK_OTHER && !strcmp(tt->text, ",")) + { + error(ERR_NONFATAL, "`%s': more than one comma on line", + directives[i]); + free_tlist(tline); + return -1; + } + if (t->type == TOK_WHITESPACE) + { + t = t->next; + continue; + } + else if (tt->type == TOK_WHITESPACE) + { + tt = tt->next; + continue; + } + else if (tt->type != t->type || + mstrcmp(tt->text, t->text, casesense)) + { + j = FALSE; /* found mismatching tokens */ + break; + } + else + { + t = t->next; + tt = tt->next; + continue; + } + } + if ((t->type != TOK_OTHER || strcmp(t->text, ",")) || tt) + j = FALSE; /* trailing gunk on one end or other */ + if (i == PP_IFNIDN || i == PP_ELIFNIDN || + i == PP_IFNIDNI || i == PP_ELIFNIDNI) + j = !j; + free_tlist(tline); + return j; + + case PP_IFMACRO: + case PP_ELIFMACRO: + case PP_IFNMACRO: + case PP_ELIFNMACRO: + { + int found = 0; + MMacro searching, *mmac; + + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tok_type_(tline, TOK_ID)) + { + error(ERR_NONFATAL, + "`%s' expects a macro name", + directives[i]); + return -1; + } + searching.name = nasm_strdup(tline->text); + searching.casesense = (i == PP_MACRO); + searching.plus = FALSE; + searching.nolist = FALSE; + searching.in_progress = FALSE; + searching.rep_nest = NULL; + searching.nparam_min = 0; + searching.nparam_max = INT_MAX; + tline = expand_smacro(tline->next); + skip_white_(tline); + if (!tline) + { + } else if (!tok_type_(tline, TOK_NUMBER)) + { + error(ERR_NONFATAL, + "`%s' expects a parameter count or nothing", + directives[i]); + } + else + { + searching.nparam_min = searching.nparam_max = + readnum(tline->text, &j); + if (j) + error(ERR_NONFATAL, + "unable to parse parameter count `%s'", + tline->text); + } + if (tline && tok_is_(tline->next, "-")) + { + tline = tline->next->next; + if (tok_is_(tline, "*")) + searching.nparam_max = INT_MAX; + else if (!tok_type_(tline, TOK_NUMBER)) + error(ERR_NONFATAL, + "`%s' expects a parameter count after `-'", + directives[i]); + else + { + searching.nparam_max = readnum(tline->text, &j); + if (j) + error(ERR_NONFATAL, + "unable to parse parameter count `%s'", + tline->text); + if (searching.nparam_min > searching.nparam_max) + error(ERR_NONFATAL, + "minimum parameter count exceeds maximum"); + } + } + if (tline && tok_is_(tline->next, "+")) + { + tline = tline->next; + searching.plus = TRUE; + } + mmac = mmacros[hash(searching.name)]; + while (mmac) + { + if (!strcmp(mmac->name, searching.name) && + (mmac->nparam_min <= searching.nparam_max + || searching.plus) + && (searching.nparam_min <= mmac->nparam_max + || mmac->plus)) + { + found = TRUE; + break; + } + mmac = mmac->next; + } + nasm_free(searching.name); + free_tlist(origline); + if (i == PP_IFNMACRO || i == PP_ELIFNMACRO) + found = !found; + return found; + } + + case PP_IFID: + case PP_ELIFID: + case PP_IFNID: + case PP_ELIFNID: + case PP_IFNUM: + case PP_ELIFNUM: + case PP_IFNNUM: + case PP_ELIFNNUM: + case PP_IFSTR: + case PP_ELIFSTR: + case PP_IFNSTR: + case PP_ELIFNSTR: + tline = expand_smacro(tline); + t = tline; + while (tok_type_(t, TOK_WHITESPACE)) + t = t->next; + j = FALSE; /* placate optimiser */ + if (t) + switch (i) + { + case PP_IFID: + case PP_ELIFID: + case PP_IFNID: + case PP_ELIFNID: + j = (t->type == TOK_ID); + break; + case PP_IFNUM: + case PP_ELIFNUM: + case PP_IFNNUM: + case PP_ELIFNNUM: + j = (t->type == TOK_NUMBER); + break; + case PP_IFSTR: + case PP_ELIFSTR: + case PP_IFNSTR: + case PP_ELIFNSTR: + j = (t->type == TOK_STRING); + break; + } + if (i == PP_IFNID || i == PP_ELIFNID || + i == PP_IFNNUM || i == PP_ELIFNNUM || + i == PP_IFNSTR || i == PP_ELIFNSTR) + j = !j; + free_tlist(tline); + return j; + + case PP_IF: + case PP_ELIF: + t = tline = expand_smacro(tline); + tptr = &t; + tokval.t_type = TOKEN_INVALID; + evalresult = evaluate(ppscan, tptr, &tokval, + NULL, pass | CRITICAL, error, NULL); + free_tlist(tline); + if (!evalresult) + return -1; + if (tokval.t_type) + error(ERR_WARNING, + "trailing garbage after expression ignored"); + if (!is_simple(evalresult)) + { + error(ERR_NONFATAL, + "non-constant value given to `%s'", directives[i]); + return -1; + } + return reloc_value(evalresult) != 0; + + default: + error(ERR_FATAL, + "preprocessor directive `%s' not yet implemented", + directives[i]); + free_tlist(origline); + return -1; /* yeah, right */ + } +} + +/* + * Expand macros in a string. Used in %error and %include directives. + * First tokenise the string, apply "expand_smacro" and then de-tokenise back. + * The returned variable should ALWAYS be freed after usage. + */ +void +expand_macros_in_string(char **p) +{ + Token *line = tokenise(*p); + line = expand_smacro(line); + *p = detoken(line, FALSE); +} + +/** + * find and process preprocessor directive in passed line + * Find out if a line contains a preprocessor directive, and deal + * with it if so. + * + * If a directive _is_ found, it is the responsibility of this routine + * (and not the caller) to free_tlist() the line. + * + * @param tline a pointer to the current tokeninzed line linked list + * @return DIRECTIVE_FOUND or NO_DIRECTIVE_FOUND + * + */ +static int +do_directive(Token * tline) +{ + int i, j, k, m, nparam, nolist; + int offset; + char *p, *mname; + Include *inc; + Context *ctx; + Cond *cond; + SMacro *smac, **smhead; + MMacro *mmac; + Token *t, *tt, *param_start, *macro_start, *last, **tptr, *origline; + Line *l; + struct tokenval tokval; + expr *evalresult; + MMacro *tmp_defining; /* Used when manipulating rep_nest */ + + origline = tline; + + skip_white_(tline); + if (!tok_type_(tline, TOK_PREPROC_ID) || + (tline->text[1] == '%' || tline->text[1] == '$' + || tline->text[1] == '!')) + return NO_DIRECTIVE_FOUND; + + i = -1; + j = elements(directives); + while (j - i > 1) + { + k = (j + i) / 2; + m = nasm_stricmp(tline->text, directives[k]); + if (m == 0) { + if (tasm_compatible_mode) { + i = k; + j = -2; + } else if (k != PP_ARG && k != PP_LOCAL && k != PP_STACKSIZE) { + i = k; + j = -2; + } + break; + } + else if (m < 0) { + j = k; + } + else + i = k; + } + + /* + * If we're in a non-emitting branch of a condition construct, + * or walking to the end of an already terminated %rep block, + * we should ignore all directives except for condition + * directives. + */ + if (((istk->conds && !emitting(istk->conds->state)) || + (istk->mstk && !istk->mstk->in_progress)) && + !is_condition(i)) + { + return NO_DIRECTIVE_FOUND; + } + + /* + * If we're defining a macro or reading a %rep block, we should + * ignore all directives except for %macro/%imacro (which + * generate an error), %endm/%endmacro, and (only if we're in a + * %rep block) %endrep. If we're in a %rep block, another %rep + * causes an error, so should be let through. + */ + if (defining && i != PP_MACRO && i != PP_IMACRO && + i != PP_ENDMACRO && i != PP_ENDM && + (defining->name || (i != PP_ENDREP && i != PP_REP))) + { + return NO_DIRECTIVE_FOUND; + } + + if (j != -2) + { + error(ERR_NONFATAL, "unknown preprocessor directive `%s'", + tline->text); + return NO_DIRECTIVE_FOUND; /* didn't get it */ + } + + switch (i) + { + case PP_STACKSIZE: + /* Directive to tell NASM what the default stack size is. The + * default is for a 16-bit stack, and this can be overriden with + * %stacksize large. + * the following form: + * + * ARG arg1:WORD, arg2:DWORD, arg4:QWORD + */ + tline = tline->next; + if (tline && tline->type == TOK_WHITESPACE) + tline = tline->next; + if (!tline || tline->type != TOK_ID) + { + error(ERR_NONFATAL, "`%%stacksize' missing size parameter"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + if (nasm_stricmp(tline->text, "flat") == 0) + { + /* All subsequent ARG directives are for a 32-bit stack */ + StackSize = 4; + StackPointer = "ebp"; + ArgOffset = 8; + LocalOffset = 4; + } + else if (nasm_stricmp(tline->text, "large") == 0) + { + /* All subsequent ARG directives are for a 16-bit stack, + * far function call. + */ + StackSize = 2; + StackPointer = "bp"; + ArgOffset = 4; + LocalOffset = 2; + } + else if (nasm_stricmp(tline->text, "small") == 0) + { + /* All subsequent ARG directives are for a 16-bit stack, + * far function call. We don't support near functions. + */ + StackSize = 2; + StackPointer = "bp"; + ArgOffset = 6; + LocalOffset = 2; + } + else + { + error(ERR_NONFATAL, "`%%stacksize' invalid size type"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_ARG: + /* TASM like ARG directive to define arguments to functions, in + * the following form: + * + * ARG arg1:WORD, arg2:DWORD, arg4:QWORD + */ + offset = ArgOffset; + do + { + char *arg, directive[256]; + int size = StackSize; + + /* Find the argument name */ + tline = tline->next; + if (tline && tline->type == TOK_WHITESPACE) + tline = tline->next; + if (!tline || tline->type != TOK_ID) + { + error(ERR_NONFATAL, "`%%arg' missing argument parameter"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + arg = tline->text; + + /* Find the argument size type */ + tline = tline->next; + if (!tline || tline->type != TOK_OTHER + || tline->text[0] != ':') + { + error(ERR_NONFATAL, + "Syntax error processing `%%arg' directive"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + tline = tline->next; + if (!tline || tline->type != TOK_ID) + { + error(ERR_NONFATAL, + "`%%arg' missing size type parameter"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + /* Allow macro expansion of type parameter */ + tt = tokenise(tline->text); + tt = expand_smacro(tt); + if (nasm_stricmp(tt->text, "byte") == 0) + { + size = MAX(StackSize, 1); + } + else if (nasm_stricmp(tt->text, "word") == 0) + { + size = MAX(StackSize, 2); + } + else if (nasm_stricmp(tt->text, "dword") == 0) + { + size = MAX(StackSize, 4); + } + else if (nasm_stricmp(tt->text, "qword") == 0) + { + size = MAX(StackSize, 8); + } + else if (nasm_stricmp(tt->text, "tword") == 0) + { + size = MAX(StackSize, 10); + } + else + { + error(ERR_NONFATAL, + "Invalid size type for `%%arg' missing directive"); + free_tlist(tt); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + free_tlist(tt); + + /* Now define the macro for the argument */ + sprintf(directive, "%%define %s (%s+%d)", arg, StackPointer, + offset); + do_directive(tokenise(directive)); + offset += size; + + /* Move to the next argument in the list */ + tline = tline->next; + if (tline && tline->type == TOK_WHITESPACE) + tline = tline->next; + } + while (tline && tline->type == TOK_OTHER + && tline->text[0] == ','); + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_LOCAL: + /* TASM like LOCAL directive to define local variables for a + * function, in the following form: + * + * LOCAL local1:WORD, local2:DWORD, local4:QWORD = LocalSize + * + * The '= LocalSize' at the end is ignored by NASM, but is + * required by TASM to define the local parameter size (and used + * by the TASM macro package). + */ + offset = LocalOffset; + do + { + char *local, directive[256]; + int size = StackSize; + + /* Find the argument name */ + tline = tline->next; + if (tline && tline->type == TOK_WHITESPACE) + tline = tline->next; + if (!tline || tline->type != TOK_ID) + { + error(ERR_NONFATAL, + "`%%local' missing argument parameter"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + local = tline->text; + + /* Find the argument size type */ + tline = tline->next; + if (!tline || tline->type != TOK_OTHER + || tline->text[0] != ':') + { + error(ERR_NONFATAL, + "Syntax error processing `%%local' directive"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + tline = tline->next; + if (!tline || tline->type != TOK_ID) + { + error(ERR_NONFATAL, + "`%%local' missing size type parameter"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + /* Allow macro expansion of type parameter */ + tt = tokenise(tline->text); + tt = expand_smacro(tt); + if (nasm_stricmp(tt->text, "byte") == 0) + { + size = MAX(StackSize, 1); + } + else if (nasm_stricmp(tt->text, "word") == 0) + { + size = MAX(StackSize, 2); + } + else if (nasm_stricmp(tt->text, "dword") == 0) + { + size = MAX(StackSize, 4); + } + else if (nasm_stricmp(tt->text, "qword") == 0) + { + size = MAX(StackSize, 8); + } + else if (nasm_stricmp(tt->text, "tword") == 0) + { + size = MAX(StackSize, 10); + } + else + { + error(ERR_NONFATAL, + "Invalid size type for `%%local' missing directive"); + free_tlist(tt); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + free_tlist(tt); + + /* Now define the macro for the argument */ + sprintf(directive, "%%define %s (%s-%d)", local, StackPointer, + offset); + do_directive(tokenise(directive)); + offset += size; + + /* Now define the assign to setup the enter_c macro correctly */ + sprintf(directive, "%%assign %%$localsize %%$localsize+%d", + size); + do_directive(tokenise(directive)); + + /* Move to the next argument in the list */ + tline = tline->next; + if (tline && tline->type == TOK_WHITESPACE) + tline = tline->next; + } + while (tline && tline->type == TOK_OTHER + && tline->text[0] == ','); + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_CLEAR: + if (tline->next) + error(ERR_WARNING, + "trailing garbage after `%%clear' ignored"); + for (j = 0; j < NHASH; j++) + { + while (mmacros[j]) + { + MMacro *m = mmacros[j]; + mmacros[j] = m->next; + free_mmacro(m); + } + while (smacros[j]) + { + SMacro *s = smacros[j]; + smacros[j] = smacros[j]->next; + nasm_free(s->name); + free_tlist(s->expansion); + nasm_free(s); + } + } + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_INCLUDE: + tline = tline->next; + skip_white_(tline); + if (!tline || (tline->type != TOK_STRING && + tline->type != TOK_INTERNAL_STRING)) + { + error(ERR_NONFATAL, "`%%include' expects a file name"); + free_tlist(origline); + return DIRECTIVE_FOUND; /* but we did _something_ */ + } + if (tline->next) + error(ERR_WARNING, + "trailing garbage after `%%include' ignored"); + if (tline->type != TOK_INTERNAL_STRING) + { + p = tline->text + 1; /* point past the quote to the name */ + p[strlen(p) - 1] = '\0'; /* remove the trailing quote */ + } + else + p = tline->text; /* internal_string is easier */ + expand_macros_in_string(&p); + inc = nasm_malloc(sizeof(Include)); + inc->next = istk; + inc->conds = NULL; + inc->fp = inc_fopen(p); + inc->fname = src_set_fname(p); + inc->lineno = src_set_linnum(0); + inc->lineinc = 1; + inc->expansion = NULL; + inc->mstk = NULL; + istk = inc; + list->uplevel(LIST_INCLUDE); + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_PUSH: + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tok_type_(tline, TOK_ID)) + { + error(ERR_NONFATAL, "`%%push' expects a context identifier"); + free_tlist(origline); + return DIRECTIVE_FOUND; /* but we did _something_ */ + } + if (tline->next) + error(ERR_WARNING, "trailing garbage after `%%push' ignored"); + ctx = nasm_malloc(sizeof(Context)); + ctx->next = cstk; + ctx->localmac = NULL; + ctx->name = nasm_strdup(tline->text); + ctx->number = unique++; + cstk = ctx; + free_tlist(origline); + break; + + case PP_REPL: + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tok_type_(tline, TOK_ID)) + { + error(ERR_NONFATAL, "`%%repl' expects a context identifier"); + free_tlist(origline); + return DIRECTIVE_FOUND; /* but we did _something_ */ + } + if (tline->next) + error(ERR_WARNING, "trailing garbage after `%%repl' ignored"); + if (!cstk) + error(ERR_NONFATAL, "`%%repl': context stack is empty"); + else + { + nasm_free(cstk->name); + cstk->name = nasm_strdup(tline->text); + } + free_tlist(origline); + break; + + case PP_POP: + if (tline->next) + error(ERR_WARNING, "trailing garbage after `%%pop' ignored"); + if (!cstk) + error(ERR_NONFATAL, + "`%%pop': context stack is already empty"); + else + ctx_pop(); + free_tlist(origline); + break; + + case PP_ERROR: + tline->next = expand_smacro(tline->next); + tline = tline->next; + skip_white_(tline); + if (tok_type_(tline, TOK_STRING)) + { + p = tline->text + 1; /* point past the quote to the name */ + p[strlen(p) - 1] = '\0'; /* remove the trailing quote */ + expand_macros_in_string(&p); + error(ERR_NONFATAL, "%s", p); + nasm_free(p); + } + else + { + p = detoken(tline, FALSE); + error(ERR_WARNING, "%s", p); + nasm_free(p); + } + free_tlist(origline); + break; + + case PP_IF: + case PP_IFCTX: + case PP_IFDEF: + case PP_IFID: + case PP_IFIDN: + case PP_IFIDNI: + case PP_IFMACRO: + case PP_IFNCTX: + case PP_IFNDEF: + case PP_IFNID: + case PP_IFNIDN: + case PP_IFNIDNI: + case PP_IFNMACRO: + case PP_IFNNUM: + case PP_IFNSTR: + case PP_IFNUM: + case PP_IFSTR: + if (istk->conds && !emitting(istk->conds->state)) + j = COND_NEVER; + else + { + j = if_condition(tline->next, i); + tline->next = NULL; /* it got freed */ + free_tlist(origline); + j = j < 0 ? COND_NEVER : j ? COND_IF_TRUE : COND_IF_FALSE; + } + cond = nasm_malloc(sizeof(Cond)); + cond->next = istk->conds; + cond->state = j; + istk->conds = cond; + return DIRECTIVE_FOUND; + + case PP_ELIF: + case PP_ELIFCTX: + case PP_ELIFDEF: + case PP_ELIFID: + case PP_ELIFIDN: + case PP_ELIFIDNI: + case PP_ELIFMACRO: + case PP_ELIFNCTX: + case PP_ELIFNDEF: + case PP_ELIFNID: + case PP_ELIFNIDN: + case PP_ELIFNIDNI: + case PP_ELIFNMACRO: + case PP_ELIFNNUM: + case PP_ELIFNSTR: + case PP_ELIFNUM: + case PP_ELIFSTR: + if (!istk->conds) + error(ERR_FATAL, "`%s': no matching `%%if'", directives[i]); + if (emitting(istk->conds->state) + || istk->conds->state == COND_NEVER) + istk->conds->state = COND_NEVER; + else + { + /* + * IMPORTANT: In the case of %if, we will already have + * called expand_mmac_params(); however, if we're + * processing an %elif we must have been in a + * non-emitting mode, which would have inhibited + * the normal invocation of expand_mmac_params(). Therefore, + * we have to do it explicitly here. + */ + j = if_condition(expand_mmac_params(tline->next), i); + tline->next = NULL; /* it got freed */ + free_tlist(origline); + istk->conds->state = + j < 0 ? COND_NEVER : j ? COND_IF_TRUE : COND_IF_FALSE; + } + return DIRECTIVE_FOUND; + + case PP_ELSE: + if (tline->next) + error(ERR_WARNING, "trailing garbage after `%%else' ignored"); + if (!istk->conds) + error(ERR_FATAL, "`%%else': no matching `%%if'"); + if (emitting(istk->conds->state) + || istk->conds->state == COND_NEVER) + istk->conds->state = COND_ELSE_FALSE; + else + istk->conds->state = COND_ELSE_TRUE; + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_ENDIF: + if (tline->next) + error(ERR_WARNING, + "trailing garbage after `%%endif' ignored"); + if (!istk->conds) + error(ERR_FATAL, "`%%endif': no matching `%%if'"); + cond = istk->conds; + istk->conds = cond->next; + nasm_free(cond); + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_MACRO: + case PP_IMACRO: + if (defining) + error(ERR_FATAL, + "`%%%smacro': already defining a macro", + (i == PP_IMACRO ? "i" : "")); + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tok_type_(tline, TOK_ID)) + { + error(ERR_NONFATAL, + "`%%%smacro' expects a macro name", + (i == PP_IMACRO ? "i" : "")); + return DIRECTIVE_FOUND; + } + defining = nasm_malloc(sizeof(MMacro)); + defining->name = nasm_strdup(tline->text); + defining->casesense = (i == PP_MACRO); + defining->plus = FALSE; + defining->nolist = FALSE; + defining->in_progress = FALSE; + defining->rep_nest = NULL; + tline = expand_smacro(tline->next); + skip_white_(tline); + if (!tok_type_(tline, TOK_NUMBER)) + { + error(ERR_NONFATAL, + "`%%%smacro' expects a parameter count", + (i == PP_IMACRO ? "i" : "")); + defining->nparam_min = defining->nparam_max = 0; + } + else + { + defining->nparam_min = defining->nparam_max = + readnum(tline->text, &j); + if (j) + error(ERR_NONFATAL, + "unable to parse parameter count `%s'", + tline->text); + } + if (tline && tok_is_(tline->next, "-")) + { + tline = tline->next->next; + if (tok_is_(tline, "*")) + defining->nparam_max = INT_MAX; + else if (!tok_type_(tline, TOK_NUMBER)) + error(ERR_NONFATAL, + "`%%%smacro' expects a parameter count after `-'", + (i == PP_IMACRO ? "i" : "")); + else + { + defining->nparam_max = readnum(tline->text, &j); + if (j) + error(ERR_NONFATAL, + "unable to parse parameter count `%s'", + tline->text); + if (defining->nparam_min > defining->nparam_max) + error(ERR_NONFATAL, + "minimum parameter count exceeds maximum"); + } + } + if (tline && tok_is_(tline->next, "+")) + { + tline = tline->next; + defining->plus = TRUE; + } + if (tline && tok_type_(tline->next, TOK_ID) && + !nasm_stricmp(tline->next->text, ".nolist")) + { + tline = tline->next; + defining->nolist = TRUE; + } + mmac = mmacros[hash(defining->name)]; + while (mmac) + { + if (!strcmp(mmac->name, defining->name) && + (mmac->nparam_min <= defining->nparam_max + || defining->plus) + && (defining->nparam_min <= mmac->nparam_max + || mmac->plus)) + { + error(ERR_WARNING, + "redefining multi-line macro `%s'", + defining->name); + break; + } + mmac = mmac->next; + } + /* + * Handle default parameters. + */ + if (tline && tline->next) + { + defining->dlist = tline->next; + tline->next = NULL; + count_mmac_params(defining->dlist, &defining->ndefs, + &defining->defaults); + } + else + { + defining->dlist = NULL; + defining->defaults = NULL; + } + defining->expansion = NULL; + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_ENDM: + case PP_ENDMACRO: + if (!defining) + { + error(ERR_NONFATAL, "`%s': not defining a macro", + tline->text); + return DIRECTIVE_FOUND; + } + k = hash(defining->name); + defining->next = mmacros[k]; + mmacros[k] = defining; + defining = NULL; + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_ROTATE: + if (tline->next && tline->next->type == TOK_WHITESPACE) + tline = tline->next; + if (tline->next == NULL) + { + free_tlist(origline); + error(ERR_NONFATAL, "`%%rotate' missing rotate count"); + return DIRECTIVE_FOUND; + } + t = expand_smacro(tline->next); + tline->next = NULL; + free_tlist(origline); + tline = t; + tptr = &t; + tokval.t_type = TOKEN_INVALID; + evalresult = + evaluate(ppscan, tptr, &tokval, NULL, pass, error, NULL); + free_tlist(tline); + if (!evalresult) + return DIRECTIVE_FOUND; + if (tokval.t_type) + error(ERR_WARNING, + "trailing garbage after expression ignored"); + if (!is_simple(evalresult)) + { + error(ERR_NONFATAL, "non-constant value given to `%%rotate'"); + return DIRECTIVE_FOUND; + } + mmac = istk->mstk; + while (mmac && !mmac->name) /* avoid mistaking %reps for macros */ + mmac = mmac->next_active; + if (!mmac) + { + error(ERR_NONFATAL, + "`%%rotate' invoked outside a macro call"); + } + else if (mmac->nparam == 0) + { + error(ERR_NONFATAL, + "`%%rotate' invoked within macro without parameters"); + } + else + { + mmac->rotate = mmac->rotate + reloc_value(evalresult); + + if (mmac->rotate < 0) + mmac->rotate = + mmac->nparam - (-mmac->rotate) % mmac->nparam; + mmac->rotate %= mmac->nparam; + } + return DIRECTIVE_FOUND; + + case PP_REP: + nolist = FALSE; + tline = tline->next; + if (tline->next && tline->next->type == TOK_WHITESPACE) + tline = tline->next; + if (tline->next && tline->next->type == TOK_ID && + !nasm_stricmp(tline->next->text, ".nolist")) + { + tline = tline->next; + nolist = TRUE; + } + t = expand_smacro(tline->next); + tline->next = NULL; + free_tlist(origline); + tline = t; + tptr = &t; + tokval.t_type = TOKEN_INVALID; + evalresult = + evaluate(ppscan, tptr, &tokval, NULL, pass, error, NULL); + free_tlist(tline); + if (!evalresult) + return DIRECTIVE_FOUND; + if (tokval.t_type) + error(ERR_WARNING, + "trailing garbage after expression ignored"); + if (!is_simple(evalresult)) + { + error(ERR_NONFATAL, "non-constant value given to `%%rep'"); + return DIRECTIVE_FOUND; + } + tmp_defining = defining; + defining = nasm_malloc(sizeof(MMacro)); + defining->name = NULL; /* flags this macro as a %rep block */ + defining->casesense = 0; + defining->plus = FALSE; + defining->nolist = nolist; + defining->in_progress = reloc_value(evalresult) + 1; + defining->nparam_min = defining->nparam_max = 0; + defining->defaults = NULL; + defining->dlist = NULL; + defining->expansion = NULL; + defining->next_active = istk->mstk; + defining->rep_nest = tmp_defining; + return DIRECTIVE_FOUND; + + case PP_ENDREP: + if (!defining || defining->name) + { + error(ERR_NONFATAL, "`%%endrep': no matching `%%rep'"); + return DIRECTIVE_FOUND; + } + + /* + * Now we have a "macro" defined - although it has no name + * and we won't be entering it in the hash tables - we must + * push a macro-end marker for it on to istk->expansion. + * After that, it will take care of propagating itself (a + * macro-end marker line for a macro which is really a %rep + * block will cause the macro to be re-expanded, complete + * with another macro-end marker to ensure the process + * continues) until the whole expansion is forcibly removed + * from istk->expansion by a %exitrep. + */ + l = nasm_malloc(sizeof(Line)); + l->next = istk->expansion; + l->finishes = defining; + l->first = NULL; + istk->expansion = l; + + istk->mstk = defining; + + list->uplevel(defining->nolist ? LIST_MACRO_NOLIST : LIST_MACRO); + tmp_defining = defining; + defining = defining->rep_nest; + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_EXITREP: + /* + * We must search along istk->expansion until we hit a + * macro-end marker for a macro with no name. Then we set + * its `in_progress' flag to 0. + */ + for (l = istk->expansion; l; l = l->next) + if (l->finishes && !l->finishes->name) + break; + + if (l) + l->finishes->in_progress = 0; + else + error(ERR_NONFATAL, "`%%exitrep' not within `%%rep' block"); + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_XDEFINE: + case PP_IXDEFINE: + case PP_DEFINE: + case PP_IDEFINE: + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tline || (tline->type != TOK_ID && + (tline->type != TOK_PREPROC_ID || + tline->text[1] != '$'))) + { + error(ERR_NONFATAL, + "`%%%s%sdefine' expects a macro identifier", + ((i == PP_IDEFINE || i == PP_IXDEFINE) ? "i" : ""), + ((i == PP_XDEFINE || i == PP_IXDEFINE) ? "x" : "")); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + ctx = get_ctx(tline->text, FALSE); + if (!ctx) + smhead = &smacros[hash(tline->text)]; + else + smhead = &ctx->localmac; + mname = tline->text; + last = tline; + param_start = tline = tline->next; + nparam = 0; + + /* Expand the macro definition now for %xdefine and %ixdefine */ + if ((i == PP_XDEFINE) || (i == PP_IXDEFINE)) + tline = expand_smacro(tline); + + if (tok_is_(tline, "(")) + { + /* + * This macro has parameters. + */ + + tline = tline->next; + while (1) + { + skip_white_(tline); + if (!tline) + { + error(ERR_NONFATAL, "parameter identifier expected"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + if (tline->type != TOK_ID) + { + error(ERR_NONFATAL, + "`%s': parameter identifier expected", + tline->text); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + tline->type = TOK_SMAC_PARAM + nparam++; + tline = tline->next; + skip_white_(tline); + if (tok_is_(tline, ",")) + { + tline = tline->next; + continue; + } + if (!tok_is_(tline, ")")) + { + error(ERR_NONFATAL, + "`)' expected to terminate macro template"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + break; + } + last = tline; + tline = tline->next; + } + if (tok_type_(tline, TOK_WHITESPACE)) + last = tline, tline = tline->next; + macro_start = NULL; + last->next = NULL; + t = tline; + while (t) + { + if (t->type == TOK_ID) + { + for (tt = param_start; tt; tt = tt->next) + if (tt->type >= TOK_SMAC_PARAM && + !strcmp(tt->text, t->text)) + t->type = tt->type; + } + tt = t->next; + t->next = macro_start; + macro_start = t; + t = tt; + } + /* + * Good. We now have a macro name, a parameter count, and a + * token list (in reverse order) for an expansion. We ought + * to be OK just to create an SMacro, store it, and let + * free_tlist have the rest of the line (which we have + * carefully re-terminated after chopping off the expansion + * from the end). + */ + if (smacro_defined(ctx, mname, nparam, &smac, i == PP_DEFINE)) + { + if (!smac) + { + error(ERR_WARNING, + "single-line macro `%s' defined both with and" + " without parameters", mname); + free_tlist(origline); + free_tlist(macro_start); + return DIRECTIVE_FOUND; + } + else + { + /* + * We're redefining, so we have to take over an + * existing SMacro structure. This means freeing + * what was already in it. + */ + nasm_free(smac->name); + free_tlist(smac->expansion); + } + } + else + { + smac = nasm_malloc(sizeof(SMacro)); + smac->next = *smhead; + *smhead = smac; + } + smac->name = nasm_strdup(mname); + smac->casesense = ((i == PP_DEFINE) || (i == PP_XDEFINE)); + smac->nparam = nparam; + smac->expansion = macro_start; + smac->in_progress = FALSE; + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_UNDEF: + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tline || (tline->type != TOK_ID && + (tline->type != TOK_PREPROC_ID || + tline->text[1] != '$'))) + { + error(ERR_NONFATAL, "`%%undef' expects a macro identifier"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + if (tline->next) + { + error(ERR_WARNING, + "trailing garbage after macro name ignored"); + } + + /* Find the context that symbol belongs to */ + ctx = get_ctx(tline->text, FALSE); + if (!ctx) + smhead = &smacros[hash(tline->text)]; + else + smhead = &ctx->localmac; + + mname = tline->text; + last = tline; + last->next = NULL; + + /* + * We now have a macro name... go hunt for it. + */ + while (smacro_defined(ctx, mname, -1, &smac, 1)) + { + /* Defined, so we need to find its predecessor and nuke it */ + SMacro **s; + for (s = smhead; *s && *s != smac; s = &(*s)->next); + if (*s) + { + *s = smac->next; + nasm_free(smac->name); + free_tlist(smac->expansion); + nasm_free(smac); + } + } + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_STRLEN: + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tline || (tline->type != TOK_ID && + (tline->type != TOK_PREPROC_ID || + tline->text[1] != '$'))) + { + error(ERR_NONFATAL, + "`%%strlen' expects a macro identifier as first parameter"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + ctx = get_ctx(tline->text, FALSE); + if (!ctx) + smhead = &smacros[hash(tline->text)]; + else + smhead = &ctx->localmac; + mname = tline->text; + last = tline; + tline = expand_smacro(tline->next); + last->next = NULL; + + t = tline; + while (tok_type_(t, TOK_WHITESPACE)) + t = t->next; + /* t should now point to the string */ + if (t->type != TOK_STRING) + { + error(ERR_NONFATAL, + "`%%strlen` requires string as second parameter"); + free_tlist(tline); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + macro_start = nasm_malloc(sizeof(*macro_start)); + macro_start->next = NULL; + make_tok_num(macro_start, strlen(t->text) - 2); + macro_start->mac = NULL; + + /* + * We now have a macro name, an implicit parameter count of + * zero, and a numeric token to use as an expansion. Create + * and store an SMacro. + */ + if (smacro_defined(ctx, mname, 0, &smac, i == PP_STRLEN)) + { + if (!smac) + error(ERR_WARNING, + "single-line macro `%s' defined both with and" + " without parameters", mname); + else + { + /* + * We're redefining, so we have to take over an + * existing SMacro structure. This means freeing + * what was already in it. + */ + nasm_free(smac->name); + free_tlist(smac->expansion); + } + } + else + { + smac = nasm_malloc(sizeof(SMacro)); + smac->next = *smhead; + *smhead = smac; + } + smac->name = nasm_strdup(mname); + smac->casesense = (i == PP_STRLEN); + smac->nparam = 0; + smac->expansion = macro_start; + smac->in_progress = FALSE; + free_tlist(tline); + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_SUBSTR: + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tline || (tline->type != TOK_ID && + (tline->type != TOK_PREPROC_ID || + tline->text[1] != '$'))) + { + error(ERR_NONFATAL, + "`%%substr' expects a macro identifier as first parameter"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + ctx = get_ctx(tline->text, FALSE); + if (!ctx) + smhead = &smacros[hash(tline->text)]; + else + smhead = &ctx->localmac; + mname = tline->text; + last = tline; + tline = expand_smacro(tline->next); + last->next = NULL; + + t = tline->next; + while (tok_type_(t, TOK_WHITESPACE)) + t = t->next; + + /* t should now point to the string */ + if (t->type != TOK_STRING) + { + error(ERR_NONFATAL, + "`%%substr` requires string as second parameter"); + free_tlist(tline); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + tt = t->next; + tptr = &tt; + tokval.t_type = TOKEN_INVALID; + evalresult = + evaluate(ppscan, tptr, &tokval, NULL, pass, error, NULL); + if (!evalresult) + { + free_tlist(tline); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + if (!is_simple(evalresult)) + { + error(ERR_NONFATAL, "non-constant value given to `%%substr`"); + free_tlist(tline); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + macro_start = nasm_malloc(sizeof(*macro_start)); + macro_start->next = NULL; + macro_start->text = nasm_strdup("'''"); + if (evalresult->value > 0 + && evalresult->value < strlen(t->text) - 1) + { + macro_start->text[1] = t->text[evalresult->value]; + } + else + { + macro_start->text[2] = '\0'; + } + macro_start->type = TOK_STRING; + macro_start->mac = NULL; + + /* + * We now have a macro name, an implicit parameter count of + * zero, and a numeric token to use as an expansion. Create + * and store an SMacro. + */ + if (smacro_defined(ctx, mname, 0, &smac, i == PP_SUBSTR)) + { + if (!smac) + error(ERR_WARNING, + "single-line macro `%s' defined both with and" + " without parameters", mname); + else + { + /* + * We're redefining, so we have to take over an + * existing SMacro structure. This means freeing + * what was already in it. + */ + nasm_free(smac->name); + free_tlist(smac->expansion); + } + } + else + { + smac = nasm_malloc(sizeof(SMacro)); + smac->next = *smhead; + *smhead = smac; + } + smac->name = nasm_strdup(mname); + smac->casesense = (i == PP_SUBSTR); + smac->nparam = 0; + smac->expansion = macro_start; + smac->in_progress = FALSE; + free_tlist(tline); + free_tlist(origline); + return DIRECTIVE_FOUND; + + + case PP_ASSIGN: + case PP_IASSIGN: + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tline || (tline->type != TOK_ID && + (tline->type != TOK_PREPROC_ID || + tline->text[1] != '$'))) + { + error(ERR_NONFATAL, + "`%%%sassign' expects a macro identifier", + (i == PP_IASSIGN ? "i" : "")); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + ctx = get_ctx(tline->text, FALSE); + if (!ctx) + smhead = &smacros[hash(tline->text)]; + else + smhead = &ctx->localmac; + mname = tline->text; + last = tline; + tline = expand_smacro(tline->next); + last->next = NULL; + + t = tline; + tptr = &t; + tokval.t_type = TOKEN_INVALID; + evalresult = + evaluate(ppscan, tptr, &tokval, NULL, pass, error, NULL); + free_tlist(tline); + if (!evalresult) + { + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + if (tokval.t_type) + error(ERR_WARNING, + "trailing garbage after expression ignored"); + + if (!is_simple(evalresult)) + { + error(ERR_NONFATAL, + "non-constant value given to `%%%sassign'", + (i == PP_IASSIGN ? "i" : "")); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + macro_start = nasm_malloc(sizeof(*macro_start)); + macro_start->next = NULL; + make_tok_num(macro_start, reloc_value(evalresult)); + macro_start->mac = NULL; + + /* + * We now have a macro name, an implicit parameter count of + * zero, and a numeric token to use as an expansion. Create + * and store an SMacro. + */ + if (smacro_defined(ctx, mname, 0, &smac, i == PP_ASSIGN)) + { + if (!smac) + error(ERR_WARNING, + "single-line macro `%s' defined both with and" + " without parameters", mname); + else + { + /* + * We're redefining, so we have to take over an + * existing SMacro structure. This means freeing + * what was already in it. + */ + nasm_free(smac->name); + free_tlist(smac->expansion); + } + } + else + { + smac = nasm_malloc(sizeof(SMacro)); + smac->next = *smhead; + *smhead = smac; + } + smac->name = nasm_strdup(mname); + smac->casesense = (i == PP_ASSIGN); + smac->nparam = 0; + smac->expansion = macro_start; + smac->in_progress = FALSE; + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_LINE: + /* + * Syntax is `%line nnn[+mmm] [filename]' + */ + tline = tline->next; + skip_white_(tline); + if (!tok_type_(tline, TOK_NUMBER)) + { + error(ERR_NONFATAL, "`%%line' expects line number"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + k = readnum(tline->text, &j); + m = 1; + tline = tline->next; + if (tok_is_(tline, "+")) + { + tline = tline->next; + if (!tok_type_(tline, TOK_NUMBER)) + { + error(ERR_NONFATAL, "`%%line' expects line increment"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + m = readnum(tline->text, &j); + tline = tline->next; + } + skip_white_(tline); + src_set_linnum(k); + istk->lineinc = m; + if (tline) + { + nasm_free(src_set_fname(detoken(tline, FALSE))); + } + free_tlist(origline); + return DIRECTIVE_FOUND; + + default: + error(ERR_FATAL, + "preprocessor directive `%s' not yet implemented", + directives[i]); + break; + } + return DIRECTIVE_FOUND; +} + +/* + * Ensure that a macro parameter contains a condition code and + * nothing else. Return the condition code index if so, or -1 + * otherwise. + */ +static int +find_cc(Token * t) +{ + Token *tt; + int i, j, k, m; + + skip_white_(t); + if (t->type != TOK_ID) + return -1; + tt = t->next; + skip_white_(tt); + if (tt && (tt->type != TOK_OTHER || strcmp(tt->text, ","))) + return -1; + + i = -1; + j = elements(conditions); + while (j - i > 1) + { + k = (j + i) / 2; + m = nasm_stricmp(t->text, conditions[k]); + if (m == 0) + { + i = k; + j = -2; + break; + } + else if (m < 0) + { + j = k; + } + else + i = k; + } + if (j != -2) + return -1; + return i; +} + +/* + * Expand MMacro-local things: parameter references (%0, %n, %+n, + * %-n) and MMacro-local identifiers (%%foo). + */ +static Token * +expand_mmac_params(Token * tline) +{ + Token *t, *tt, **tail, *thead; + + tail = &thead; + thead = NULL; + + while (tline) + { + if (tline->type == TOK_PREPROC_ID && + (((tline->text[1] == '+' || tline->text[1] == '-') + && tline->text[2]) || tline->text[1] == '%' + || (tline->text[1] >= '0' && tline->text[1] <= '9'))) + { + char *text = NULL; + int type = 0, cc; /* type = 0 to placate optimisers */ + char tmpbuf[30]; + int n, i; + MMacro *mac; + + t = tline; + tline = tline->next; + + mac = istk->mstk; + while (mac && !mac->name) /* avoid mistaking %reps for macros */ + mac = mac->next_active; + if (!mac) + error(ERR_NONFATAL, "`%s': not in a macro call", t->text); + else + switch (t->text[1]) + { + /* + * We have to make a substitution of one of the + * forms %1, %-1, %+1, %%foo, %0. + */ + case '0': + type = TOK_NUMBER; + sprintf(tmpbuf, "%d", mac->nparam); + text = nasm_strdup(tmpbuf); + break; + case '%': + type = TOK_ID; + sprintf(tmpbuf, "..@%lu.", mac->unique); + text = nasm_strcat(tmpbuf, t->text + 2); + break; + case '-': + n = atoi(t->text + 2) - 1; + if (n >= mac->nparam) + tt = NULL; + else + { + if (mac->nparam > 1) + n = (n + mac->rotate) % mac->nparam; + tt = mac->params[n]; + } + cc = find_cc(tt); + if (cc == -1) + { + error(ERR_NONFATAL, + "macro parameter %d is not a condition code", + n + 1); + text = NULL; + } + else + { + type = TOK_ID; + if (inverse_ccs[cc] == -1) + { + error(ERR_NONFATAL, + "condition code `%s' is not invertible", + conditions[cc]); + text = NULL; + } + else + text = + nasm_strdup(conditions[inverse_ccs + [cc]]); + } + break; + case '+': + n = atoi(t->text + 2) - 1; + if (n >= mac->nparam) + tt = NULL; + else + { + if (mac->nparam > 1) + n = (n + mac->rotate) % mac->nparam; + tt = mac->params[n]; + } + cc = find_cc(tt); + if (cc == -1) + { + error(ERR_NONFATAL, + "macro parameter %d is not a condition code", + n + 1); + text = NULL; + } + else + { + type = TOK_ID; + text = nasm_strdup(conditions[cc]); + } + break; + default: + n = atoi(t->text + 1) - 1; + if (n >= mac->nparam) + tt = NULL; + else + { + if (mac->nparam > 1) + n = (n + mac->rotate) % mac->nparam; + tt = mac->params[n]; + } + if (tt) + { + for (i = 0; i < mac->paramlen[n]; i++) + { + *tail = + new_Token(NULL, tt->type, tt->text, + 0); + tail = &(*tail)->next; + tt = tt->next; + } + } + text = NULL; /* we've done it here */ + break; + } + if (!text) + { + delete_Token(t); + } + else + { + *tail = t; + tail = &t->next; + t->type = type; + nasm_free(t->text); + t->text = text; + t->mac = NULL; + } + continue; + } + else + { + t = *tail = tline; + tline = tline->next; + t->mac = NULL; + tail = &t->next; + } + } + *tail = NULL; + t = thead; + for (; t && (tt = t->next) != NULL; t = t->next) + switch (t->type) + { + case TOK_WHITESPACE: + if (tt->type == TOK_WHITESPACE) + { + t->next = delete_Token(tt); + } + break; + case TOK_ID: + if (tt->type == TOK_ID || tt->type == TOK_NUMBER) + { + char *tmp = nasm_strcat(t->text, tt->text); + nasm_free(t->text); + t->text = tmp; + t->next = delete_Token(tt); + } + break; + case TOK_NUMBER: + if (tt->type == TOK_NUMBER) + { + char *tmp = nasm_strcat(t->text, tt->text); + nasm_free(t->text); + t->text = tmp; + t->next = delete_Token(tt); + } + break; + } + + return thead; +} + +/* + * Expand all single-line macro calls made in the given line. + * Return the expanded version of the line. The original is deemed + * to be destroyed in the process. (In reality we'll just move + * Tokens from input to output a lot of the time, rather than + * actually bothering to destroy and replicate.) + */ +static Token * +expand_smacro(Token * tline) +{ + Token *t, *tt, *mstart, **tail, *thead; + SMacro *head = NULL, *m; + Token **params; + int *paramsize; + int nparam, sparam, brackets, rescan; + Token *org_tline = tline; + Context *ctx; + char *mname; + + /* + * Trick: we should avoid changing the start token pointer since it can + * be contained in "next" field of other token. Because of this + * we allocate a copy of first token and work with it; at the end of + * routine we copy it back + */ + if (org_tline) + { + tline = + new_Token(org_tline->next, org_tline->type, org_tline->text, + 0); + tline->mac = org_tline->mac; + nasm_free(org_tline->text); + org_tline->text = NULL; + } + + again: + tail = &thead; + thead = NULL; + + while (tline) + { /* main token loop */ + if ((mname = tline->text)) + { + /* if this token is a local macro, look in local context */ + if (tline->type == TOK_ID || tline->type == TOK_PREPROC_ID) + ctx = get_ctx(mname, TRUE); + else + ctx = NULL; + if (!ctx) + head = smacros[hash(mname)]; + else + head = ctx->localmac; + /* + * We've hit an identifier. As in is_mmacro below, we first + * check whether the identifier is a single-line macro at + * all, then think about checking for parameters if + * necessary. + */ + for (m = head; m; m = m->next) + if (!mstrcmp(m->name, mname, m->casesense)) + break; + if (m) + { + mstart = tline; + params = NULL; + paramsize = NULL; + if (m->nparam == 0) + { + /* + * Simple case: the macro is parameterless. Discard the + * one token that the macro call took, and push the + * expansion back on the to-do stack. + */ + if (!m->expansion) + { + if (!strcmp("__FILE__", m->name)) + { + long num = 0; + src_get(&num, &(tline->text)); + nasm_quote(&(tline->text)); + tline->type = TOK_STRING; + continue; + } + if (!strcmp("__LINE__", m->name)) + { + nasm_free(tline->text); + make_tok_num(tline, src_get_linnum()); + continue; + } + tline = delete_Token(tline); + continue; + } + } + else + { + /* + * Complicated case: at least one macro with this name + * exists and takes parameters. We must find the + * parameters in the call, count them, find the SMacro + * that corresponds to that form of the macro call, and + * substitute for the parameters when we expand. What a + * pain. + */ + tline = tline->next; + skip_white_(tline); + if (!tok_is_(tline, "(")) + { + /* + * This macro wasn't called with parameters: ignore + * the call. (Behaviour borrowed from gnu cpp.) + */ + tline = mstart; + m = NULL; + } + else + { + int paren = 0; + int white = 0; + brackets = 0; + nparam = 0; + tline = tline->next; + sparam = PARAM_DELTA; + params = nasm_malloc(sparam * sizeof(Token *)); + params[0] = tline; + paramsize = nasm_malloc(sparam * sizeof(int)); + paramsize[0] = 0; + for (;; tline = tline->next) + { /* parameter loop */ + if (!tline) + { + error(ERR_NONFATAL, + "macro call expects terminating `)'"); + break; + } + if (tline->type == TOK_WHITESPACE + && brackets <= 0) + { + if (paramsize[nparam]) + white++; + else + params[nparam] = tline->next; + continue; /* parameter loop */ + } + if (tline->type == TOK_OTHER + && tline->text[1] == 0) + { + char ch = tline->text[0]; + if (ch == ',' && !paren && brackets <= 0) + { + if (++nparam >= sparam) + { + sparam += PARAM_DELTA; + params = nasm_realloc(params, + sparam * sizeof(Token *)); + paramsize = nasm_realloc(paramsize, + sparam * sizeof(int)); + } + params[nparam] = tline->next; + paramsize[nparam] = 0; + white = 0; + continue; /* parameter loop */ + } + if (ch == '{' && + (brackets > 0 || (brackets == 0 && + !paramsize[nparam]))) + { + if (!(brackets++)) + { + params[nparam] = tline->next; + continue; /* parameter loop */ + } + } + if (ch == '}' && brackets > 0) + if (--brackets == 0) + { + brackets = -1; + continue; /* parameter loop */ + } + if (ch == '(' && !brackets) + paren++; + if (ch == ')' && brackets <= 0) + if (--paren < 0) + break; + } + if (brackets < 0) + { + brackets = 0; + error(ERR_NONFATAL, "braces do not " + "enclose all of macro parameter"); + } + paramsize[nparam] += white + 1; + white = 0; + } /* parameter loop */ + nparam++; + while (m && (m->nparam != nparam || + mstrcmp(m->name, mname, + m->casesense))) + m = m->next; + if (!m) + error(ERR_WARNING | ERR_WARN_MNP, + "macro `%s' exists, " + "but not taking %d parameters", + mstart->text, nparam); + } + } + if (m && m->in_progress) + m = NULL; + if (!m) /* in progess or didn't find '(' or wrong nparam */ + { + /* + * Design question: should we handle !tline, which + * indicates missing ')' here, or expand those + * macros anyway, which requires the (t) test a few + * lines down? + */ + nasm_free(params); + nasm_free(paramsize); + tline = mstart; + } + else + { + /* + * Expand the macro: we are placed on the last token of the + * call, so that we can easily split the call from the + * following tokens. We also start by pushing an SMAC_END + * token for the cycle removal. + */ + t = tline; + if (t) + { + tline = t->next; + t->next = NULL; + } + tt = new_Token(tline, TOK_SMAC_END, NULL, 0); + tt->mac = m; + m->in_progress = TRUE; + tline = tt; + for (t = m->expansion; t; t = t->next) + { + if (t->type >= TOK_SMAC_PARAM) + { + Token *pcopy = tline, **ptail = &pcopy; + Token *ttt, *pt; + int i; + + ttt = params[t->type - TOK_SMAC_PARAM]; + for (i = paramsize[t->type - TOK_SMAC_PARAM]; + --i >= 0;) + { + pt = *ptail = + new_Token(tline, ttt->type, ttt->text, + 0); + ptail = &pt->next; + ttt = ttt->next; + } + tline = pcopy; + } + else + { + tt = new_Token(tline, t->type, t->text, 0); + tline = tt; + } + } + + /* + * Having done that, get rid of the macro call, and clean + * up the parameters. + */ + nasm_free(params); + nasm_free(paramsize); + free_tlist(mstart); + continue; /* main token loop */ + } + } + } + + if (tline->type == TOK_SMAC_END) + { + tline->mac->in_progress = FALSE; + tline = delete_Token(tline); + } + else + { + t = *tail = tline; + tline = tline->next; + t->mac = NULL; + t->next = NULL; + tail = &t->next; + } + } + + /* + * Now scan the entire line and look for successive TOK_IDs that resulted + * after expansion (they can't be produced by tokenise()). The successive + * TOK_IDs should be concatenated. + * Also we look for %+ tokens and concatenate the tokens before and after + * them (without white spaces in between). + */ + t = thead; + rescan = 0; + while (t) + { + while (t && t->type != TOK_ID && t->type != TOK_PREPROC_ID) + t = t->next; + if (!t || !t->next) + break; + if (t->next->type == TOK_ID || + t->next->type == TOK_PREPROC_ID || + t->next->type == TOK_NUMBER) + { + char *p = nasm_strcat(t->text, t->next->text); + nasm_free(t->text); + t->next = delete_Token(t->next); + t->text = p; + rescan = 1; + } + else if (t->next->type == TOK_WHITESPACE && t->next->next && + t->next->next->type == TOK_PREPROC_ID && + strcmp(t->next->next->text, "%+") == 0) + { + /* free the next whitespace, the %+ token and next whitespace */ + int i; + for (i = 1; i <= 3; i++) + { + if (!t->next || (i != 2 && t->next->type != TOK_WHITESPACE)) + break; + t->next = delete_Token(t->next); + } /* endfor */ + } + else + t = t->next; + } + /* If we concatenaded something, re-scan the line for macros */ + if (rescan) + { + tline = thead; + goto again; + } + + if (org_tline) + { + if (thead) + { + *org_tline = *thead; + /* since we just gave text to org_line, don't free it */ + thead->text = NULL; + delete_Token(thead); + } + else + { + /* the expression expanded to empty line; + we can't return NULL for some reasons + we just set the line to a single WHITESPACE token. */ + memset(org_tline, 0, sizeof(*org_tline)); + org_tline->text = NULL; + org_tline->type = TOK_WHITESPACE; + } + thead = org_tline; + } + + return thead; +} + +/* + * Similar to expand_smacro but used exclusively with macro identifiers + * right before they are fetched in. The reason is that there can be + * identifiers consisting of several subparts. We consider that if there + * are more than one element forming the name, user wants a expansion, + * otherwise it will be left as-is. Example: + * + * %define %$abc cde + * + * the identifier %$abc will be left as-is so that the handler for %define + * will suck it and define the corresponding value. Other case: + * + * %define _%$abc cde + * + * In this case user wants name to be expanded *before* %define starts + * working, so we'll expand %$abc into something (if it has a value; + * otherwise it will be left as-is) then concatenate all successive + * PP_IDs into one. + */ +static Token * +expand_id(Token * tline) +{ + Token *cur, *oldnext = NULL; + + if (!tline || !tline->next) + return tline; + + cur = tline; + while (cur->next && + (cur->next->type == TOK_ID || + cur->next->type == TOK_PREPROC_ID || cur->next->type == TOK_NUMBER)) + cur = cur->next; + + /* If identifier consists of just one token, don't expand */ + if (cur == tline) + return tline; + + if (cur) + { + oldnext = cur->next; /* Detach the tail past identifier */ + cur->next = NULL; /* so that expand_smacro stops here */ + } + + tline = expand_smacro(tline); + + if (cur) + { + /* expand_smacro possibly changhed tline; re-scan for EOL */ + cur = tline; + while (cur && cur->next) + cur = cur->next; + if (cur) + cur->next = oldnext; + } + + return tline; +} + +/* + * Determine whether the given line constitutes a multi-line macro + * call, and return the MMacro structure called if so. Doesn't have + * to check for an initial label - that's taken care of in + * expand_mmacro - but must check numbers of parameters. Guaranteed + * to be called with tline->type == TOK_ID, so the putative macro + * name is easy to find. + */ +static MMacro * +is_mmacro(Token * tline, Token *** params_array) +{ + MMacro *head, *m; + Token **params; + int nparam; + + head = mmacros[hash(tline->text)]; + + /* + * Efficiency: first we see if any macro exists with the given + * name. If not, we can return NULL immediately. _Then_ we + * count the parameters, and then we look further along the + * list if necessary to find the proper MMacro. + */ + for (m = head; m; m = m->next) + if (!mstrcmp(m->name, tline->text, m->casesense)) + break; + if (!m) + return NULL; + + /* + * OK, we have a potential macro. Count and demarcate the + * parameters. + */ + count_mmac_params(tline->next, &nparam, ¶ms); + + /* + * So we know how many parameters we've got. Find the MMacro + * structure that handles this number. + */ + while (m) + { + if (m->nparam_min <= nparam && (m->plus || nparam <= m->nparam_max)) + { + /* + * This one is right. Just check if cycle removal + * prohibits us using it before we actually celebrate... + */ + if (m->in_progress) + { +#if 0 + error(ERR_NONFATAL, + "self-reference in multi-line macro `%s'", m->name); +#endif + nasm_free(params); + return NULL; + } + /* + * It's right, and we can use it. Add its default + * parameters to the end of our list if necessary. + */ + if (m->defaults && nparam < m->nparam_min + m->ndefs) + { + params = + nasm_realloc(params, + ((m->nparam_min + m->ndefs + 1) * sizeof(*params))); + while (nparam < m->nparam_min + m->ndefs) + { + params[nparam] = m->defaults[nparam - m->nparam_min]; + nparam++; + } + } + /* + * If we've gone over the maximum parameter count (and + * we're in Plus mode), ignore parameters beyond + * nparam_max. + */ + if (m->plus && nparam > m->nparam_max) + nparam = m->nparam_max; + /* + * Then terminate the parameter list, and leave. + */ + if (!params) + { /* need this special case */ + params = nasm_malloc(sizeof(*params)); + nparam = 0; + } + params[nparam] = NULL; + *params_array = params; + return m; + } + /* + * This one wasn't right: look for the next one with the + * same name. + */ + for (m = m->next; m; m = m->next) + if (!mstrcmp(m->name, tline->text, m->casesense)) + break; + } + + /* + * After all that, we didn't find one with the right number of + * parameters. Issue a warning, and fail to expand the macro. + */ + error(ERR_WARNING | ERR_WARN_MNP, + "macro `%s' exists, but not taking %d parameters", + tline->text, nparam); + nasm_free(params); + return NULL; +} + +/* + * Expand the multi-line macro call made by the given line, if + * there is one to be expanded. If there is, push the expansion on + * istk->expansion and return 1. Otherwise return 0. + */ +static int +expand_mmacro(Token * tline) +{ + Token *startline = tline; + Token *label = NULL; + int dont_prepend = 0; + Token **params, *t, *tt; + MMacro *m; + Line *l, *ll; + int i, nparam, *paramlen; + + t = tline; + skip_white_(t); +/* if (!tok_type_(t, TOK_ID)) Lino 02/25/02 */ + if (!tok_type_(t, TOK_ID) && !tok_type_(t, TOK_PREPROC_ID)) + return 0; + m = is_mmacro(t, ¶ms); + if (!m) + { + Token *last; + /* + * We have an id which isn't a macro call. We'll assume + * it might be a label; we'll also check to see if a + * colon follows it. Then, if there's another id after + * that lot, we'll check it again for macro-hood. + */ + label = last = t; + t = t->next; + if (tok_type_(t, TOK_WHITESPACE)) + last = t, t = t->next; + if (tok_is_(t, ":")) + { + dont_prepend = 1; + last = t, t = t->next; + if (tok_type_(t, TOK_WHITESPACE)) + last = t, t = t->next; + } + if (!tok_type_(t, TOK_ID) || (m = is_mmacro(t, ¶ms)) == NULL) + return 0; + last->next = NULL; + tline = t; + } + + /* + * Fix up the parameters: this involves stripping leading and + * trailing whitespace, then stripping braces if they are + * present. + */ + for (nparam = 0; params[nparam]; nparam++) + ; + paramlen = nparam ? nasm_malloc(nparam * sizeof(*paramlen)) : NULL; + + for (i = 0; params[i]; i++) + { + int brace = FALSE; + int comma = (!m->plus || i < nparam - 1); + + t = params[i]; + skip_white_(t); + if (tok_is_(t, "{")) + t = t->next, brace = TRUE, comma = FALSE; + params[i] = t; + paramlen[i] = 0; + while (t) + { + if (comma && t->type == TOK_OTHER && !strcmp(t->text, ",")) + break; /* ... because we have hit a comma */ + if (comma && t->type == TOK_WHITESPACE && tok_is_(t->next, ",")) + break; /* ... or a space then a comma */ + if (brace && t->type == TOK_OTHER && !strcmp(t->text, "}")) + break; /* ... or a brace */ + t = t->next; + paramlen[i]++; + } + } + + /* + * OK, we have a MMacro structure together with a set of + * parameters. We must now go through the expansion and push + * copies of each Line on to istk->expansion. Substitution of + * parameter tokens and macro-local tokens doesn't get done + * until the single-line macro substitution process; this is + * because delaying them allows us to change the semantics + * later through %rotate. + * + * First, push an end marker on to istk->expansion, mark this + * macro as in progress, and set up its invocation-specific + * variables. + */ + ll = nasm_malloc(sizeof(Line)); + ll->next = istk->expansion; + ll->finishes = m; + ll->first = NULL; + istk->expansion = ll; + + m->in_progress = TRUE; + m->params = params; + m->iline = tline; + m->nparam = nparam; + m->rotate = 0; + m->paramlen = paramlen; + m->unique = unique++; + m->lineno = 0; + + m->next_active = istk->mstk; + istk->mstk = m; + + for (l = m->expansion; l; l = l->next) + { + Token **tail; + + ll = nasm_malloc(sizeof(Line)); + ll->finishes = NULL; + ll->next = istk->expansion; + istk->expansion = ll; + tail = &ll->first; + + for (t = l->first; t; t = t->next) + { + Token *x = t; + if (t->type == TOK_PREPROC_ID && + t->text[1] == '0' && t->text[2] == '0') + { + dont_prepend = -1; + x = label; + if (!x) + continue; + } + tt = *tail = new_Token(NULL, x->type, x->text, 0); + tail = &tt->next; + } + *tail = NULL; + } + + /* + * If we had a label, push it on as the first line of + * the macro expansion. + */ + if (label) + { + if (dont_prepend < 0) + free_tlist(startline); + else + { + ll = nasm_malloc(sizeof(Line)); + ll->finishes = NULL; + ll->next = istk->expansion; + istk->expansion = ll; + ll->first = startline; + if (!dont_prepend) + { + while (label->next) + label = label->next; + label->next = tt = new_Token(NULL, TOK_OTHER, ":", 0); + } + } + } + + list->uplevel(m->nolist ? LIST_MACRO_NOLIST : LIST_MACRO); + + return 1; +} + +/* + * Since preprocessor always operate only on the line that didn't + * arrived yet, we should always use ERR_OFFBY1. Also since user + * won't want to see same error twice (preprocessing is done once + * per pass) we will want to show errors only during pass one. + */ +static void +error(int severity, const char *fmt, ...) +{ + va_list arg; + char buff[1024]; + + /* If we're in a dead branch of IF or something like it, ignore the error */ + if (istk && istk->conds && !emitting(istk->conds->state)) + return; + + va_start(arg, fmt); + vsprintf(buff, fmt, arg); + va_end(arg); + + if (istk && istk->mstk && istk->mstk->name) + _error(severity | ERR_PASS1, "(%s:%d) %s", istk->mstk->name, + istk->mstk->lineno, buff); + else + _error(severity | ERR_PASS1, "%s", buff); +} + +static void +pp_reset(char *file, int apass, efunc errfunc, evalfunc eval, + ListGen * listgen) +{ + int h; + + _error = errfunc; + cstk = NULL; + istk = nasm_malloc(sizeof(Include)); + istk->next = NULL; + istk->conds = NULL; + istk->expansion = NULL; + istk->mstk = NULL; + istk->fp = fopen(file, "r"); + istk->fname = NULL; + src_set_fname(nasm_strdup(file)); + src_set_linnum(0); + istk->lineinc = 1; + if (!istk->fp) + error(ERR_FATAL | ERR_NOFILE, "unable to open input file `%s'", file); + defining = NULL; + for (h = 0; h < NHASH; h++) + { + mmacros[h] = NULL; + smacros[h] = NULL; + } + unique = 0; + if (tasm_compatible_mode) { + stdmacpos = stdmac; + } else { + stdmacpos = &stdmac[TASM_MACRO_COUNT]; + } + any_extrastdmac = (extrastdmac != NULL); + list = listgen; + evaluate = eval; + pass = apass; +} + +static char * +pp_getline(void) +{ + char *line; + Token *tline; + + while (1) + { + /* + * Fetch a tokenised line, either from the macro-expansion + * buffer or from the input file. + */ + tline = NULL; + while (istk->expansion && istk->expansion->finishes) + { + Line *l = istk->expansion; + if (!l->finishes->name && l->finishes->in_progress > 1) + { + Line *ll; + + /* + * This is a macro-end marker for a macro with no + * name, which means it's not really a macro at all + * but a %rep block, and the `in_progress' field is + * more than 1, meaning that we still need to + * repeat. (1 means the natural last repetition; 0 + * means termination by %exitrep.) We have + * therefore expanded up to the %endrep, and must + * push the whole block on to the expansion buffer + * again. We don't bother to remove the macro-end + * marker: we'd only have to generate another one + * if we did. + */ + l->finishes->in_progress--; + for (l = l->finishes->expansion; l; l = l->next) + { + Token *t, *tt, **tail; + + ll = nasm_malloc(sizeof(Line)); + ll->next = istk->expansion; + ll->finishes = NULL; + ll->first = NULL; + tail = &ll->first; + + for (t = l->first; t; t = t->next) + { + if (t->text || t->type == TOK_WHITESPACE) + { + tt = *tail = new_Token(NULL, t->type, t->text, 0); + tail = &tt->next; + } + } + + istk->expansion = ll; + } + } + else + { + /* + * Check whether a `%rep' was started and not ended + * within this macro expansion. This can happen and + * should be detected. It's a fatal error because + * I'm too confused to work out how to recover + * sensibly from it. + */ + if (defining) + { + if (defining->name) + error(ERR_PANIC, "defining with name in expansion"); + else if (istk->mstk->name) + error(ERR_FATAL, "`%%rep' without `%%endrep' within" + " expansion of macro `%s'", istk->mstk->name); + } + + /* + * FIXME: investigate the relationship at this point between + * istk->mstk and l->finishes + */ + { + MMacro *m = istk->mstk; + istk->mstk = m->next_active; + if (m->name) + { + /* + * This was a real macro call, not a %rep, and + * therefore the parameter information needs to + * be freed. + */ + nasm_free(m->params); + free_tlist(m->iline); + nasm_free(m->paramlen); + l->finishes->in_progress = FALSE; + } + else + free_mmacro(m); + } + istk->expansion = l->next; + nasm_free(l); + list->downlevel(LIST_MACRO); + } + } + while (1) + { /* until we get a line we can use */ + + if (istk->expansion) + { /* from a macro expansion */ + char *p; + Line *l = istk->expansion; + if (istk->mstk) + istk->mstk->lineno++; + tline = l->first; + istk->expansion = l->next; + nasm_free(l); + p = detoken(tline, FALSE); + list->line(LIST_MACRO, p); + nasm_free(p); + break; + } + line = read_line(); + if (line) + { /* from the current input file */ + line = prepreproc(line); + tline = tokenise(line); + nasm_free(line); + break; + } + /* + * The current file has ended; work down the istk + */ + { + Include *i = istk; + fclose(i->fp); + if (i->conds) + error(ERR_FATAL, "expected `%%endif' before end of file"); + /* only set line and file name if there's a next node */ + if (i->next) + { + src_set_linnum(i->lineno); + nasm_free(src_set_fname(i->fname)); + } + istk = i->next; + list->downlevel(LIST_INCLUDE); + nasm_free(i); + if (!istk) + return NULL; + } + } + + /* + * We must expand MMacro parameters and MMacro-local labels + * _before_ we plunge into directive processing, to cope + * with things like `%define something %1' such as STRUC + * uses. Unless we're _defining_ a MMacro, in which case + * those tokens should be left alone to go into the + * definition; and unless we're in a non-emitting + * condition, in which case we don't want to meddle with + * anything. + */ + if (!defining && !(istk->conds && !emitting(istk->conds->state))) + tline = expand_mmac_params(tline); + + /* + * Check the line to see if it's a preprocessor directive. + */ + if (do_directive(tline) == DIRECTIVE_FOUND) + { + continue; + } + else if (defining) + { + /* + * We're defining a multi-line macro. We emit nothing + * at all, and just + * shove the tokenised line on to the macro definition. + */ + Line *l = nasm_malloc(sizeof(Line)); + l->next = defining->expansion; + l->first = tline; + l->finishes = FALSE; + defining->expansion = l; + continue; + } + else if (istk->conds && !emitting(istk->conds->state)) + { + /* + * We're in a non-emitting branch of a condition block. + * Emit nothing at all, not even a blank line: when we + * emerge from the condition we'll give a line-number + * directive so we keep our place correctly. + */ + free_tlist(tline); + continue; + } + else if (istk->mstk && !istk->mstk->in_progress) + { + /* + * We're in a %rep block which has been terminated, so + * we're walking through to the %endrep without + * emitting anything. Emit nothing at all, not even a + * blank line: when we emerge from the %rep block we'll + * give a line-number directive so we keep our place + * correctly. + */ + free_tlist(tline); + continue; + } + else + { + tline = expand_smacro(tline); + if (!expand_mmacro(tline)) + { + /* + * De-tokenise the line again, and emit it. + */ + line = detoken(tline, TRUE); + free_tlist(tline); + break; + } + else + { + continue; /* expand_mmacro calls free_tlist */ + } + } + } + + return line; +} + +static void +pp_cleanup(int pass) +{ + int h; + + if (defining) + { + error(ERR_NONFATAL, "end of file while still defining macro `%s'", + defining->name); + free_mmacro(defining); + } + while (cstk) + ctx_pop(); + for (h = 0; h < NHASH; h++) + { + while (mmacros[h]) + { + MMacro *m = mmacros[h]; + mmacros[h] = mmacros[h]->next; + free_mmacro(m); + } + while (smacros[h]) + { + SMacro *s = smacros[h]; + smacros[h] = smacros[h]->next; + nasm_free(s->name); + free_tlist(s->expansion); + nasm_free(s); + } + } + while (istk) + { + Include *i = istk; + istk = istk->next; + fclose(i->fp); + nasm_free(i->fname); + nasm_free(i); + } + while (cstk) + ctx_pop(); + if (pass == 0) + { + free_llist(predef); + delete_Blocks(); + } +} + +void +pp_include_path(char *path) +{ + IncPath *i; + + i = nasm_malloc(sizeof(IncPath)); + i->path = nasm_strdup(path); + i->next = ipath; + ipath = i; +} + +void +pp_pre_include(char *fname) +{ + Token *inc, *space, *name; + Line *l; + + name = new_Token(NULL, TOK_INTERNAL_STRING, fname, 0); + space = new_Token(name, TOK_WHITESPACE, NULL, 0); + inc = new_Token(space, TOK_PREPROC_ID, "%include", 0); + + l = nasm_malloc(sizeof(Line)); + l->next = predef; + l->first = inc; + l->finishes = FALSE; + predef = l; +} + +void +pp_pre_define(char *definition) +{ + Token *def, *space; + Line *l; + char *equals; + + equals = strchr(definition, '='); + space = new_Token(NULL, TOK_WHITESPACE, NULL, 0); + def = new_Token(space, TOK_PREPROC_ID, "%define", 0); + if (equals) + *equals = ' '; + space->next = tokenise(definition); + if (equals) + *equals = '='; + + l = nasm_malloc(sizeof(Line)); + l->next = predef; + l->first = def; + l->finishes = FALSE; + predef = l; +} + +void +pp_pre_undefine(char *definition) +{ + Token *def, *space; + Line *l; + + space = new_Token(NULL, TOK_WHITESPACE, NULL, 0); + def = new_Token(space, TOK_PREPROC_ID, "%undef", 0); + + l = nasm_malloc(sizeof(Line)); + l->next = predef; + l->first = def; + l->finishes = FALSE; + predef = l; +} + +void +pp_extra_stdmac(const char **macros) +{ + extrastdmac = macros; +} + +static void +make_tok_num(Token * tok, long val) +{ + char numbuf[20]; + sprintf(numbuf, "%ld", val); + tok->text = nasm_strdup(numbuf); + tok->type = TOK_NUMBER; +} + +Preproc nasmpp = { + pp_reset, + pp_getline, + pp_cleanup +}; diff --git a/modules/preprocs/nasm/nasm-pp.h b/modules/preprocs/nasm/nasm-pp.h new file mode 100644 index 00000000..0b7df114 --- /dev/null +++ b/modules/preprocs/nasm/nasm-pp.h @@ -0,0 +1,20 @@ +/* preproc.h header file for preproc.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_PREPROC_H +#define NASM_PREPROC_H + +void pp_include_path (char *); +void pp_pre_include (char *); +void pp_pre_define (char *); +void pp_pre_undefine (char *); +void pp_extra_stdmac (const char **); + +extern Preproc nasmpp; + +#endif diff --git a/modules/preprocs/nasm/nasm.h b/modules/preprocs/nasm/nasm.h new file mode 100644 index 00000000..8f4f293c --- /dev/null +++ b/modules/preprocs/nasm/nasm.h @@ -0,0 +1,850 @@ +/* nasm.h main header file for the Netwide Assembler: inter-module interface + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version: 27/iii/95 by Simon Tatham + */ + +#ifndef NASM_NASM_H +#define NASM_NASM_H + +#include +#include "version.h" /* generated NASM version macros */ + +#ifndef NULL +#define NULL 0 +#endif + +#ifndef FALSE +#define FALSE 0 /* comes in handy */ +#endif +#ifndef TRUE +#define TRUE 1 +#endif + +#define NO_SEG -1L /* null segment value */ +#define SEG_ABS 0x40000000L /* mask for far-absolute segments */ + +#ifndef FILENAME_MAX +#define FILENAME_MAX 256 +#endif + +#ifndef PREFIX_MAX +#define PREFIX_MAX 10 +#endif + +#ifndef POSTFIX_MAX +#define POSTFIX_MAX 10 +#endif + + + +/* + * Name pollution problems: on Digital UNIX pulls in some + * strange hardware header file which sees fit to define R_SP. We + * undefine it here so as not to break the enum below. + */ +#ifdef R_SP +#undef R_SP +#endif + +/* + * We must declare the existence of this structure type up here, + * since we have to reference it before we define it... + */ +struct ofmt; + +/* + * ------------------------- + * Error reporting functions + * ------------------------- + */ + +/* + * An error reporting function should look like this. + */ +typedef void (*efunc) (int severity, const char *fmt, ...); + +/* + * These are the error severity codes which get passed as the first + * argument to an efunc. + */ + +#define ERR_DEBUG 0x00000008 /* put out debugging message */ +#define ERR_WARNING 0x00000000 /* warn only: no further action */ +#define ERR_NONFATAL 0x00000001 /* terminate assembly after phase */ +#define ERR_FATAL 0x00000002 /* instantly fatal: exit with error */ +#define ERR_PANIC 0x00000003 /* internal error: panic instantly + * and dump core for reference */ +#define ERR_MASK 0x0000000F /* mask off the above codes */ +#define ERR_NOFILE 0x00000010 /* don't give source file name/line */ +#define ERR_USAGE 0x00000020 /* print a usage message */ +#define ERR_PASS1 0x00000040 /* only print this error on pass one */ + +/* + * These codes define specific types of suppressible warning. + */ + +#define ERR_WARN_MASK 0x0000FF00 /* the mask for this feature */ +#define ERR_WARN_SHR 8 /* how far to shift right */ + +#define ERR_WARN_MNP 0x00000100 /* macro-num-parameters warning */ +#define ERR_WARN_MSR 0x00000200 /* macro self-reference */ +#define ERR_WARN_OL 0x00000300 /* orphan label (no colon, and + * alone on line) */ +#define ERR_WARN_NOV 0x00000400 /* numeric overflow */ +#define ERR_WARN_GNUELF 0x00000500 /* using GNU ELF extensions */ +#define ERR_WARN_MAX 5 /* the highest numbered one */ + +/* + * ----------------------- + * Other function typedefs + * ----------------------- + */ + +/* + * A label-lookup function should look like this. + */ +typedef int (*lfunc) (char *label, long *segment, long *offset); + +/* + * And a label-definition function like this. The boolean parameter + * `is_norm' states whether the label is a `normal' label (which + * should affect the local-label system), or something odder like + * an EQU or a segment-base symbol, which shouldn't. + */ +typedef void (*ldfunc) (char *label, long segment, long offset, char *special, + int is_norm, int isextrn, struct ofmt *ofmt, + efunc error); + +/* + * List-file generators should look like this: + */ +typedef struct { + /* + * Called to initialise the listing file generator. Before this + * is called, the other routines will silently do nothing when + * called. The `char *' parameter is the file name to write the + * listing to. + */ + void (*init) (char *, efunc); + + /* + * Called to clear stuff up and close the listing file. + */ + void (*cleanup) (void); + + /* + * Called to output binary data. Parameters are: the offset; + * the data; the data type. Data types are similar to the + * output-format interface, only OUT_ADDRESS will _always_ be + * displayed as if it's relocatable, so ensure that any non- + * relocatable address has been converted to OUT_RAWDATA by + * then. Note that OUT_RAWDATA+0 is a valid data type, and is a + * dummy call used to give the listing generator an offset to + * work with when doing things like uplevel(LIST_TIMES) or + * uplevel(LIST_INCBIN). + */ + void (*output) (long, const void *, unsigned long); + + /* + * Called to send a text line to the listing generator. The + * `int' parameter is LIST_READ or LIST_MACRO depending on + * whether the line came directly from an input file or is the + * result of a multi-line macro expansion. + */ + void (*line) (int, char *); + + /* + * Called to change one of the various levelled mechanisms in + * the listing generator. LIST_INCLUDE and LIST_MACRO can be + * used to increase the nesting level of include files and + * macro expansions; LIST_TIMES and LIST_INCBIN switch on the + * two binary-output-suppression mechanisms for large-scale + * pseudo-instructions. + * + * LIST_MACRO_NOLIST is synonymous with LIST_MACRO except that + * it indicates the beginning of the expansion of a `nolist' + * macro, so anything under that level won't be expanded unless + * it includes another file. + */ + void (*uplevel) (int); + + /* + * Reverse the effects of uplevel. + */ + void (*downlevel) (int); +} ListGen; + +/* + * The expression evaluator must be passed a scanner function; a + * standard scanner is provided as part of nasmlib.c. The + * preprocessor will use a different one. Scanners, and the + * token-value structures they return, look like this. + * + * The return value from the scanner is always a copy of the + * `t_type' field in the structure. + */ +struct tokenval { + int t_type; + long t_integer, t_inttwo; + char *t_charptr; +}; +typedef int (*scanner) (void *private_data, struct tokenval *tv); + +/* + * Token types returned by the scanner, in addition to ordinary + * ASCII character values, and zero for end-of-string. + */ +enum { /* token types, other than chars */ + TOKEN_INVALID = -1, /* a placeholder value */ + TOKEN_EOS = 0, /* end of string */ + TOKEN_EQ = '=', TOKEN_GT = '>', TOKEN_LT = '<', /* aliases */ + TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, /* major token types */ + TOKEN_ERRNUM, /* numeric constant with error in */ + TOKEN_HERE, TOKEN_BASE, /* $ and $$ */ + TOKEN_SPECIAL, /* BYTE, WORD, DWORD, FAR, NEAR, etc */ + TOKEN_PREFIX, /* A32, O16, LOCK, REPNZ, TIMES, etc */ + TOKEN_SHL, TOKEN_SHR, /* << and >> */ + TOKEN_SDIV, TOKEN_SMOD, /* // and %% */ + TOKEN_GE, TOKEN_LE, TOKEN_NE, /* >=, <= and <> (!= is same as <>) */ + TOKEN_DBL_AND, TOKEN_DBL_OR, TOKEN_DBL_XOR, /* &&, || and ^^ */ + TOKEN_SEG, TOKEN_WRT, /* SEG and WRT */ + TOKEN_FLOAT /* floating-point constant */ +}; + +typedef struct { + long segment; + long offset; + int known; +} loc_t; + +/* + * Expression-evaluator datatype. Expressions, within the + * evaluator, are stored as an array of these beasts, terminated by + * a record with type==0. Mostly, it's a vector type: each type + * denotes some kind of a component, and the value denotes the + * multiple of that component present in the expression. The + * exception is the WRT type, whose `value' field denotes the + * segment to which the expression is relative. These segments will + * be segment-base types, i.e. either odd segment values or SEG_ABS + * types. So it is still valid to assume that anything with a + * `value' field of zero is insignificant. + */ +typedef struct { + long type; /* a register, or EXPR_xxx */ + long value; /* must be >= 32 bits */ +} expr; + +/* + * The evaluator can also return hints about which of two registers + * used in an expression should be the base register. See also the + * `operand' structure. + */ +struct eval_hints { + int base; + int type; +}; + +/* + * The actual expression evaluator function looks like this. When + * called, it expects the first token of its expression to already + * be in `*tv'; if it is not, set tv->t_type to TOKEN_INVALID and + * it will start by calling the scanner. + * + * If a forward reference happens during evaluation, the evaluator + * must set `*fwref' to TRUE if `fwref' is non-NULL. + * + * `critical' is non-zero if the expression may not contain forward + * references. The evaluator will report its own error if this + * occurs; if `critical' is 1, the error will be "symbol not + * defined before use", whereas if `critical' is 2, the error will + * be "symbol undefined". + * + * If `critical' has bit 8 set (in addition to its main value: 0x101 + * and 0x102 correspond to 1 and 2) then an extended expression + * syntax is recognised, in which relational operators such as =, < + * and >= are accepted, as well as low-precedence logical operators + * &&, ^^ and ||. + * + * If `hints' is non-NULL, it gets filled in with some hints as to + * the base register in complex effective addresses. + */ +#define CRITICAL 0x100 +typedef expr *(*evalfunc) (scanner sc, void *scprivate, struct tokenval *tv, + int *fwref, int critical, efunc error, + struct eval_hints *hints); + +/* + * Special values for expr->type. ASSUMPTION MADE HERE: the number + * of distinct register names (i.e. possible "type" fields for an + * expr structure) does not exceed 124 (EXPR_REG_START through + * EXPR_REG_END). + */ +#define EXPR_REG_START 1 +#define EXPR_REG_END 124 +#define EXPR_UNKNOWN 125L /* for forward references */ +#define EXPR_SIMPLE 126L +#define EXPR_WRT 127L +#define EXPR_SEGBASE 128L + +/* + * Preprocessors ought to look like this: + */ +typedef struct { + /* + * Called at the start of a pass; given a file name, the number + * of the pass, an error reporting function, an evaluator + * function, and a listing generator to talk to. + */ + void (*reset) (char *, int, efunc, evalfunc, ListGen *); + + /* + * Called to fetch a line of preprocessed source. The line + * returned has been malloc'ed, and so should be freed after + * use. + */ + char *(*getline) (void); + + /* + * Called at the end of a pass. + */ + void (*cleanup) (int); +} Preproc; + +/* + * ---------------------------------------------------------------- + * Some lexical properties of the NASM source language, included + * here because they are shared between the parser and preprocessor + * ---------------------------------------------------------------- + */ + +/* + * isidstart matches any character that may start an identifier, and isidchar + * matches any character that may appear at places other than the start of an + * identifier. E.g. a period may only appear at the start of an identifier + * (for local labels), whereas a number may appear anywhere *but* at the + * start. + */ + +#define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' \ + || (c)=='@' ) +#define isidchar(c) ( isidstart(c) || isdigit(c) || (c)=='$' || (c)=='#' \ + || (c)=='~' ) + +/* Ditto for numeric constants. */ + +#define isnumstart(c) ( isdigit(c) || (c)=='$' ) +#define isnumchar(c) ( isalnum(c) ) + +/* This returns the numeric value of a given 'digit'. */ + +#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0') + +/* + * Data-type flags that get passed to listing-file routines. + */ +enum { + LIST_READ, LIST_MACRO, LIST_MACRO_NOLIST, LIST_INCLUDE, + LIST_INCBIN, LIST_TIMES +}; + +/* + * ----------------------------------------------------------- + * Format of the `insn' structure returned from `parser.c' and + * passed into `assemble.c' + * ----------------------------------------------------------- + */ + +/* + * Here we define the operand types. These are implemented as bit + * masks, since some are subsets of others; e.g. AX in a MOV + * instruction is a special operand type, whereas AX in other + * contexts is just another 16-bit register. (Also, consider CL in + * shift instructions, DX in OUT, etc.) + */ + +/* size, and other attributes, of the operand */ +#define BITS8 0x00000001L +#define BITS16 0x00000002L +#define BITS32 0x00000004L +#define BITS64 0x00000008L /* FPU only */ +#define BITS80 0x00000010L /* FPU only */ +#define FAR 0x00000020L /* grotty: this means 16:16 or */ + /* 16:32, like in CALL/JMP */ +#define NEAR 0x00000040L +#define SHORT 0x00000080L /* and this means what it says :) */ + +#define SIZE_MASK 0x000000FFL /* all the size attributes */ +#define NON_SIZE (~SIZE_MASK) + +#define TO 0x00000100L /* reverse effect in FADD, FSUB &c */ +#define COLON 0x00000200L /* operand is followed by a colon */ +#define STRICT 0x00000400L /* do not optimize this operand */ + +/* type of operand: memory reference, register, etc. */ +#define MEMORY 0x00204000L +#define REGISTER 0x00001000L /* register number in 'basereg' */ +#define IMMEDIATE 0x00002000L + +#define REGMEM 0x00200000L /* for r/m, ie EA, operands */ +#define REGNORM 0x00201000L /* 'normal' reg, qualifies as EA */ +#define REG8 0x00201001L +#define REG16 0x00201002L +#define REG32 0x00201004L +#define MMXREG 0x00201008L /* MMX registers */ +#define XMMREG 0x00201010L /* XMM Katmai reg */ +#define FPUREG 0x01000000L /* floating point stack registers */ +#define FPU0 0x01000800L /* FPU stack register zero */ + +/* special register operands: these may be treated differently */ +#define REG_SMASK 0x00070000L /* a mask for the following */ +#define REG_ACCUM 0x00211000L /* accumulator: AL, AX or EAX */ +#define REG_AL 0x00211001L /* REG_ACCUM | BITSxx */ +#define REG_AX 0x00211002L /* ditto */ +#define REG_EAX 0x00211004L /* and again */ +#define REG_COUNT 0x00221000L /* counter: CL, CX or ECX */ +#define REG_CL 0x00221001L /* REG_COUNT | BITSxx */ +#define REG_CX 0x00221002L /* ditto */ +#define REG_ECX 0x00221004L /* another one */ +#define REG_DX 0x00241002L +#define REG_SREG 0x00081002L /* any segment register */ +#define REG_CS 0x01081002L /* CS */ +#define REG_DESS 0x02081002L /* DS, ES, SS (non-CS 86 registers) */ +#define REG_FSGS 0x04081002L /* FS, GS (386 extended registers) */ +#define REG_SEG67 0x08081002L /* Non-implemented segment registers */ +#define REG_CDT 0x00101004L /* CRn, DRn and TRn */ +#define REG_CREG 0x08101004L /* CRn */ +#define REG_DREG 0x10101004L /* DRn */ +#define REG_TREG 0x20101004L /* TRn */ + +/* special type of EA */ +#define MEM_OFFS 0x00604000L /* simple [address] offset */ + +/* special type of immediate operand */ +#define ONENESS 0x00800000L /* so UNITY == IMMEDIATE | ONENESS */ +#define UNITY 0x00802000L /* for shift/rotate instructions */ +#define BYTENESS 0x40000000L /* so SBYTE == IMMEDIATE | BYTENESS */ +#define SBYTE 0x40002000L /* for op r16/32,immediate instrs. */ + +/* Register names automatically generated from regs.dat */ +#include "regs.h" + +enum { /* condition code names */ + C_A, C_AE, C_B, C_BE, C_C, C_E, C_G, C_GE, C_L, C_LE, C_NA, C_NAE, + C_NB, C_NBE, C_NC, C_NE, C_NG, C_NGE, C_NL, C_NLE, C_NO, C_NP, + C_NS, C_NZ, C_O, C_P, C_PE, C_PO, C_S, C_Z +}; + +/* + * Note that because segment registers may be used as instruction + * prefixes, we must ensure the enumerations for prefixes and + * register names do not overlap. + */ +enum { /* instruction prefixes */ + PREFIX_ENUM_START = REG_ENUM_LIMIT, + P_A16 = PREFIX_ENUM_START, P_A32, P_LOCK, P_O16, P_O32, P_REP, P_REPE, + P_REPNE, P_REPNZ, P_REPZ, P_TIMES +}; + +enum { /* extended operand types */ + EOT_NOTHING, EOT_DB_STRING, EOT_DB_NUMBER +}; + +enum { /* special EA flags */ + EAF_BYTEOFFS = 1, /* force offset part to byte size */ + EAF_WORDOFFS = 2, /* force offset part to [d]word size */ + EAF_TIMESTWO = 4 /* really do EAX*2 not EAX+EAX */ +}; + +enum { /* values for `hinttype' */ + EAH_NOHINT = 0, /* no hint at all - our discretion */ + EAH_MAKEBASE = 1, /* try to make given reg the base */ + EAH_NOTBASE = 2 /* try _not_ to make reg the base */ +}; + +typedef struct { /* operand to an instruction */ + long type; /* type of operand */ + int addr_size; /* 0 means default; 16; 32 */ + int basereg, indexreg, scale; /* registers and scale involved */ + int hintbase, hinttype; /* hint as to real base register */ + long segment; /* immediate segment, if needed */ + long offset; /* any immediate number */ + long wrt; /* segment base it's relative to */ + int eaflags; /* special EA flags */ + int opflags; /* see OPFLAG_* defines below */ +} operand; + +#define OPFLAG_FORWARD 1 /* operand is a forward reference */ +#define OPFLAG_EXTERN 2 /* operand is an external reference */ + +typedef struct extop { /* extended operand */ + struct extop *next; /* linked list */ + long type; /* defined above */ + char *stringval; /* if it's a string, then here it is */ + int stringlen; /* ... and here's how long it is */ + long segment; /* if it's a number/address, then... */ + long offset; /* ... it's given here ... */ + long wrt; /* ... and here */ +} extop; + +#define MAXPREFIX 4 + +typedef struct { /* an instruction itself */ + char *label; /* the label defined, or NULL */ + int prefixes[MAXPREFIX]; /* instruction prefixes, if any */ + int nprefix; /* number of entries in above */ + int opcode; /* the opcode - not just the string */ + int condition; /* the condition code, if Jcc/SETcc */ + int operands; /* how many operands? 0-3 + * (more if db et al) */ + operand oprs[3]; /* the operands, defined as above */ + extop *eops; /* extended operands */ + int eops_float; /* true if DD and floating */ + long times; /* repeat count (TIMES prefix) */ + int forw_ref; /* is there a forward reference? */ +} insn; + +enum geninfo { GI_SWITCH }; +/* + * ------------------------------------------------------------ + * The data structure defining an output format driver, and the + * interfaces to the functions therein. + * ------------------------------------------------------------ + */ + +struct ofmt { + /* + * This is a short (one-liner) description of the type of + * output generated by the driver. + */ + const char *fullname; + + /* + * This is a single keyword used to select the driver. + */ + const char *shortname; + + /* + * this is reserved for out module specific help. + * It is set to NULL in all the out modules but is not implemented + * in the main program + */ + const char *helpstring; + + /* + * this is a pointer to the first element of the debug information + */ + struct dfmt **debug_formats; + + /* + * and a pointer to the element that is being used + * note: this is set to the default at compile time and changed if the + * -F option is selected. If developing a set of new debug formats for + * an output format, be sure to set this to whatever default you want + * + */ + struct dfmt *current_dfmt; + + /* + * This, if non-NULL, is a NULL-terminated list of `char *'s + * pointing to extra standard macros supplied by the object + * format (e.g. a sensible initial default value of __SECT__, + * and user-level equivalents for any format-specific + * directives). + */ + const char **stdmac; + + /* + * This procedure is called at the start of an output session. + * It tells the output format what file it will be writing to, + * what routine to report errors through, and how to interface + * to the label manager and expression evaluator if necessary. + * It also gives it a chance to do other initialisation. + */ + void (*init) (FILE *fp, efunc error, ldfunc ldef, evalfunc eval); + + /* + * This procedure is called to pass generic information to the + * object file. The first parameter gives the information type + * (currently only command line switches) + * and the second parameter gives the value. This function returns + * 1 if recognized, 0 if unrecognized + */ + int (*setinfo)(enum geninfo type, char **string); + + /* + * This procedure is called by assemble() to write actual + * generated code or data to the object file. Typically it + * doesn't have to actually _write_ it, just store it for + * later. + * + * The `type' argument specifies the type of output data, and + * usually the size as well: its contents are described below. + */ + void (*output) (long segto, const void *data, unsigned long type, + long segment, long wrt); + + /* + * This procedure is called once for every symbol defined in + * the module being assembled. It gives the name and value of + * the symbol, in NASM's terms, and indicates whether it has + * been declared to be global. Note that the parameter "name", + * when passed, will point to a piece of static storage + * allocated inside the label manager - it's safe to keep using + * that pointer, because the label manager doesn't clean up + * until after the output driver has. + * + * Values of `is_global' are: 0 means the symbol is local; 1 + * means the symbol is global; 2 means the symbol is common (in + * which case `offset' holds the _size_ of the variable). + * Anything else is available for the output driver to use + * internally. + * + * This routine explicitly _is_ allowed to call the label + * manager to define further symbols, if it wants to, even + * though it's been called _from_ the label manager. That much + * re-entrancy is guaranteed in the label manager. However, the + * label manager will in turn call this routine, so it should + * be prepared to be re-entrant itself. + * + * The `special' parameter contains special information passed + * through from the command that defined the label: it may have + * been an EXTERN, a COMMON or a GLOBAL. The distinction should + * be obvious to the output format from the other parameters. + */ + void (*symdef) (char *name, long segment, long offset, int is_global, + char *special); + + /* + * This procedure is called when the source code requests a + * segment change. It should return the corresponding segment + * _number_ for the name, or NO_SEG if the name is not a valid + * segment name. + * + * It may also be called with NULL, in which case it is to + * return the _default_ section number for starting assembly in. + * + * It is allowed to modify the string it is given a pointer to. + * + * It is also allowed to specify a default instruction size for + * the segment, by setting `*bits' to 16 or 32. Or, if it + * doesn't wish to define a default, it can leave `bits' alone. + */ + long (*section) (char *name, int pass, int *bits); + + /* + * This procedure is called to modify the segment base values + * returned from the SEG operator. It is given a segment base + * value (i.e. a segment value with the low bit set), and is + * required to produce in return a segment value which may be + * different. It can map segment bases to absolute numbers by + * means of returning SEG_ABS types. + * + * It should return NO_SEG if the segment base cannot be + * determined; the evaluator (which calls this routine) is + * responsible for throwing an error condition if that occurs + * in pass two or in a critical expression. + */ + long (*segbase) (long segment); + + /* + * This procedure is called to allow the output driver to + * process its own specific directives. When called, it has the + * directive word in `directive' and the parameter string in + * `value'. It is called in both assembly passes, and `pass' + * will be either 1 or 2. + * + * This procedure should return zero if it does not _recognise_ + * the directive, so that the main program can report an error. + * If it recognises the directive but then has its own errors, + * it should report them itself and then return non-zero. It + * should also return non-zero if it correctly processes the + * directive. + */ + int (*directive) (char *directive, char *value, int pass); + + /* + * This procedure is called before anything else - even before + * the "init" routine - and is passed the name of the input + * file from which this output file is being generated. It + * should return its preferred name for the output file in + * `outname', if outname[0] is not '\0', and do nothing to + * `outname' otherwise. Since it is called before the driver is + * properly initialised, it has to be passed its error handler + * separately. + * + * This procedure may also take its own copy of the input file + * name for use in writing the output file: it is _guaranteed_ + * that it will be called before the "init" routine. + * + * The parameter `outname' points to an area of storage + * guaranteed to be at least FILENAME_MAX in size. + */ + void (*filename) (char *inname, char *outname, efunc error); + + /* + * This procedure is called after assembly finishes, to allow + * the output driver to clean itself up and free its memory. + * Typically, it will also be the point at which the object + * file actually gets _written_. + * + * One thing the cleanup routine should always do is to close + * the output file pointer. + */ + void (*cleanup) (int debuginfo); +}; + +/* + * values for the `type' parameter to an output function. Each one + * must have the actual number of _bytes_ added to it. + * + * Exceptions are OUT_RELxADR, which denote an x-byte relocation + * which will be a relative jump. For this we need to know the + * distance in bytes from the start of the relocated record until + * the end of the containing instruction. _This_ is what is stored + * in the size part of the parameter, in this case. + * + * Also OUT_RESERVE denotes reservation of N bytes of BSS space, + * and the contents of the "data" parameter is irrelevant. + * + * The "data" parameter for the output function points to a "long", + * containing the address in question, unless the type is + * OUT_RAWDATA, in which case it points to an "unsigned char" + * array. + */ +#define OUT_RAWDATA 0x00000000UL +#define OUT_ADDRESS 0x10000000UL +#define OUT_REL2ADR 0x20000000UL +#define OUT_REL4ADR 0x30000000UL +#define OUT_RESERVE 0x40000000UL +#define OUT_TYPMASK 0xF0000000UL +#define OUT_SIZMASK 0x0FFFFFFFUL + +/* + * ------------------------------------------------------------ + * The data structure defining a debug format driver, and the + * interfaces to the functions therein. + * ------------------------------------------------------------ + */ + +struct dfmt { + + /* + * This is a short (one-liner) description of the type of + * output generated by the driver. + */ + const char *fullname; + + /* + * This is a single keyword used to select the driver. + */ + const char *shortname; + + + /* + * init - called initially to set up local pointer to object format, + * void pointer to implementation defined data, file pointer (which + * probably won't be used, but who knows?), and error function. + */ + void (*init) (struct ofmt * of, void * id, FILE * fp, efunc error); + + /* + * linenum - called any time there is output with a change of + * line number or file. + */ + void (*linenum) (const char * filename, long linenumber, long segto); + + /* + * debug_deflabel - called whenever a label is defined. Parameters + * are the same as to 'symdef()' in the output format. This function + * would be called before the output format version. + */ + + void (*debug_deflabel) (char * name, long segment, long offset, + int is_global, char * special); + /* + * debug_directive - called whenever a DEBUG directive other than 'LINE' + * is encountered. 'directive' contains the first parameter to the + * DEBUG directive, and params contains the rest. For example, + * 'DEBUG VAR _somevar:int' would translate to a call to this + * function with 'directive' equal to "VAR" and 'params' equal to + * "_somevar:int". + */ + void (*debug_directive) (const char * directive, const char * params); + + /* + * typevalue - called whenever the assembler wishes to register a type + * for the last defined label. This routine MUST detect if a type was + * already registered and not re-register it. + */ + void (*debug_typevalue) (long type); + + /* + * debug_output - called whenever output is required + * 'type' is the type of info required, and this is format-specific + */ + void (*debug_output) (int type, void *param); + + /* + * cleanup - called after processing of file is complete + */ + void (*cleanup) (void); + +}; +/* + * The type definition macros + * for debugging + * + * low 3 bits: reserved + * next 5 bits: type + * next 24 bits: number of elements for arrays (0 for labels) + */ + +#define TY_UNKNOWN 0x00 +#define TY_LABEL 0x08 +#define TY_BYTE 0x10 +#define TY_WORD 0x18 +#define TY_DWORD 0x20 +#define TY_FLOAT 0x28 +#define TY_QWORD 0x30 +#define TY_TBYTE 0x38 +#define TY_COMMON 0xE0 +#define TY_SEG 0xE8 +#define TY_EXTERN 0xF0 +#define TY_EQU 0xF8 + +#define TYM_TYPE(x) ((x) & 0xF8) +#define TYM_ELEMENTS(x) (((x) & 0xFFFFFF00) >> 8) + +#define TYS_ELEMENTS(x) ((x) << 8) +/* + * ----- + * Other + * ----- + */ + +/* + * This is a useful #define which I keep meaning to use more often: + * the number of elements of a statically defined array. + */ + +#define elements(x) ( sizeof(x) / sizeof(*(x)) ) + +extern int tasm_compatible_mode; + +/* + * This declaration passes the "pass" number to all other modules + * "pass0" assumes the values: 0, 0, ..., 0, 1, 2 + * where 0 = optimizing pass + * 1 = pass 1 + * 2 = pass 2 + */ + +extern int pass0; /* this is globally known */ +extern int optimizing; + +#endif diff --git a/modules/preprocs/nasm/nasmlib.c b/modules/preprocs/nasm/nasmlib.c new file mode 100644 index 00000000..bb0ab29e --- /dev/null +++ b/modules/preprocs/nasm/nasmlib.c @@ -0,0 +1,1116 @@ +/* nasmlib.c library routines for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" +#include "insns.h" /* For MAX_KEYWORD */ + +static efunc nasm_malloc_error; + +#ifdef LOGALLOC +static FILE *logfp; +#endif + +void nasm_set_malloc_error (efunc error) +{ + nasm_malloc_error = error; +#ifdef LOGALLOC + logfp = fopen ("malloc.log", "w"); + setvbuf (logfp, NULL, _IOLBF, BUFSIZ); + fprintf (logfp, "null pointer is %p\n", NULL); +#endif +} + +#ifdef LOGALLOC +void *nasm_malloc_log (char *file, int line, size_t size) +#else +void *nasm_malloc (size_t size) +#endif +{ + void *p = malloc(size); + if (!p) + nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory"); +#ifdef LOGALLOC + else + fprintf(logfp, "%s %d malloc(%ld) returns %p\n", + file, line, (long)size, p); +#endif + return p; +} + +#ifdef LOGALLOC +void *nasm_realloc_log (char *file, int line, void *q, size_t size) +#else +void *nasm_realloc (void *q, size_t size) +#endif +{ + void *p = q ? realloc(q, size) : malloc(size); + if (!p) + nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory"); +#ifdef LOGALLOC + else if (q) + fprintf(logfp, "%s %d realloc(%p,%ld) returns %p\n", + file, line, q, (long)size, p); + else + fprintf(logfp, "%s %d malloc(%ld) returns %p\n", + file, line, (long)size, p); +#endif + return p; +} + +#ifdef LOGALLOC +void nasm_free_log (char *file, int line, void *q) +#else +void nasm_free (void *q) +#endif +{ + if (q) { + free (q); +#ifdef LOGALLOC + fprintf(logfp, "%s %d free(%p)\n", + file, line, q); +#endif + } +} + +#ifdef LOGALLOC +char *nasm_strdup_log (char *file, int line, const char *s) +#else +char *nasm_strdup (const char *s) +#endif +{ + char *p; + int size = strlen(s)+1; + + p = malloc(size); + if (!p) + nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory"); +#ifdef LOGALLOC + else + fprintf(logfp, "%s %d strdup(%ld) returns %p\n", + file, line, (long)size, p); +#endif + strcpy (p, s); + return p; +} + +#ifdef LOGALLOC +char *nasm_strndup_log (char *file, int line, char *s, size_t len) +#else +char *nasm_strndup (char *s, size_t len) +#endif +{ + char *p; + int size = len+1; + + p = malloc(size); + if (!p) + nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory"); +#ifdef LOGALLOC + else + fprintf(logfp, "%s %d strndup(%ld) returns %p\n", + file, line, (long)size, p); +#endif + strncpy (p, s, len); + p[len] = '\0'; + return p; +} + +#if !defined(stricmp) && !defined(strcasecmp) +int nasm_stricmp (const char *s1, const char *s2) +{ + while (*s1 && tolower(*s1) == tolower(*s2)) + s1++, s2++; + if (!*s1 && !*s2) + return 0; + else if (tolower(*s1) < tolower(*s2)) + return -1; + else + return 1; +} +#endif + +#if !defined(strnicmp) && !defined(strncasecmp) +int nasm_strnicmp (const char *s1, const char *s2, int n) +{ + while (n > 0 && *s1 && tolower(*s1) == tolower(*s2)) + s1++, s2++, n--; + if ((!*s1 && !*s2) || n==0) + return 0; + else if (tolower(*s1) < tolower(*s2)) + return -1; + else + return 1; +} +#endif + +#define lib_isnumchar(c) ( isalnum(c) || (c) == '$') +#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0') + +long readnum (char *str, int *error) +{ + char *r = str, *q; + long radix; + unsigned long result, checklimit; + int digit, last; + int warn = FALSE; + int sign = 1; + + *error = FALSE; + + while (isspace(*r)) r++; /* find start of number */ + + /* + * If the number came from make_tok_num (as a result of an %assign), it + * might have a '-' built into it (rather than in a preceeding token). + */ + if (*r == '-') + { + r++; + sign = -1; + } + + q = r; + + while (lib_isnumchar(*q)) q++; /* find end of number */ + + /* + * If it begins 0x, 0X or $, or ends in H, it's in hex. if it + * ends in Q, it's octal. if it ends in B, it's binary. + * Otherwise, it's ordinary decimal. + */ + if (*r=='0' && (r[1]=='x' || r[1]=='X')) + radix = 16, r += 2; + else if (*r=='$') + radix = 16, r++; + else if (q[-1]=='H' || q[-1]=='h') + radix = 16 , q--; + else if (q[-1]=='Q' || q[-1]=='q') + radix = 8 , q--; + else if (q[-1]=='B' || q[-1]=='b') + radix = 2 , q--; + else + radix = 10; + + /* + * If this number has been found for us by something other than + * the ordinary scanners, then it might be malformed by having + * nothing between the prefix and the suffix. Check this case + * now. + */ + if (r >= q) { + *error = TRUE; + return 0; + } + + /* + * `checklimit' must be 2**32 / radix. We can't do that in + * 32-bit arithmetic, which we're (probably) using, so we + * cheat: since we know that all radices we use are even, we + * can divide 2**31 by radix/2 instead. + */ + checklimit = 0x80000000UL / (radix>>1); + + /* + * Calculate the highest allowable value for the last digit + * of a 32 bit constant... in radix 10, it is 6, otherwise it is 0 + */ + last = (radix == 10 ? 6 : 0); + + result = 0; + while (*r && r < q) { + if (*r<'0' || (*r>'9' && *r<'A') || (digit = numvalue(*r)) >= radix) + { + *error = TRUE; + return 0; + } + if (result > checklimit || + (result == checklimit && digit >= last)) + { + warn = TRUE; + } + + result = radix * result + digit; + r++; + } + + if (warn) + nasm_malloc_error (ERR_WARNING | ERR_PASS1 | ERR_WARN_NOV, + "numeric constant %s does not fit in 32 bits", + str); + + return result*sign; +} + +long readstrnum (char *str, int length, int *warn) +{ + long charconst = 0; + int i; + + *warn = FALSE; + + str += length; + for (i=0; i> 8) & 255), fp); +} + +void fwritelong (long data, FILE *fp) +{ + fputc ((int) (data & 255), fp); + fputc ((int) ((data >> 8) & 255), fp); + fputc ((int) ((data >> 16) & 255), fp); + fputc ((int) ((data >> 24) & 255), fp); +} + +void standard_extension (char *inname, char *outname, char *extension, + efunc error) +{ + char *p, *q; + + if (*outname) /* file name already exists, */ + return; /* so do nothing */ + q = inname; + p = outname; + while (*q) *p++ = *q++; /* copy, and find end of string */ + *p = '\0'; /* terminate it */ + while (p > outname && *--p != '.');/* find final period (or whatever) */ + if (*p != '.') while (*p) p++; /* go back to end if none found */ + if (!strcmp(p, extension)) { /* is the extension already there? */ + if (*extension) + error(ERR_WARNING | ERR_NOFILE, + "file name already ends in `%s': " + "output will be in `nasm.out'", + extension); + else + error(ERR_WARNING | ERR_NOFILE, + "file name already has no extension: " + "output will be in `nasm.out'"); + strcpy(outname, "nasm.out"); + } else + strcpy(p, extension); +} + +#define LEAFSIZ (sizeof(RAA)-sizeof(RAA_UNION)+sizeof(RAA_LEAF)) +#define BRANCHSIZ (sizeof(RAA)-sizeof(RAA_UNION)+sizeof(RAA_BRANCH)) + +#define LAYERSIZ(r) ( (r)->layers==0 ? RAA_BLKSIZE : RAA_LAYERSIZE ) + +static struct RAA *real_raa_init (int layers) +{ + struct RAA *r; + int i; + + if (layers == 0) { + r = nasm_malloc (LEAFSIZ); + r->layers = 0; + memset (r->u.l.data, 0, sizeof(r->u.l.data)); + r->stepsize = 1L; + } else { + r = nasm_malloc (BRANCHSIZ); + r->layers = layers; + for ( i = 0 ; i < RAA_LAYERSIZE ; i++ ) + r->u.b.data[i] = NULL; + r->stepsize = RAA_BLKSIZE; + while (--layers) + r->stepsize *= RAA_LAYERSIZE; + } + return r; +} + +struct RAA *raa_init (void) +{ + return real_raa_init (0); +} + +void raa_free (struct RAA *r) +{ + if (r->layers == 0) + nasm_free (r); + else { + struct RAA **p; + for (p = r->u.b.data; p - r->u.b.data < RAA_LAYERSIZE; p++) + if (*p) + raa_free (*p); + } +} + +long raa_read (struct RAA *r, long posn) +{ + if (posn >= r->stepsize * LAYERSIZ(r)) + return 0; /* Return 0 for undefined entries */ + while (r->layers > 0) { + ldiv_t l; + l = ldiv (posn, r->stepsize); + r = r->u.b.data[l.quot]; + posn = l.rem; + if (!r) + return 0; /* Return 0 for undefined entries */ + } + return r->u.l.data[posn]; +} + +struct RAA *raa_write (struct RAA *r, long posn, long value) +{ + struct RAA *result; + + if (posn < 0) + nasm_malloc_error (ERR_PANIC, "negative position in raa_write"); + + while (r->stepsize * LAYERSIZ(r) <= posn) { + /* + * Must add a layer. + */ + struct RAA *s; + int i; + + s = nasm_malloc (BRANCHSIZ); + for ( i = 0 ; i < RAA_LAYERSIZE ; i++ ) + s->u.b.data[i] = NULL; + s->layers = r->layers + 1; + s->stepsize = LAYERSIZ(r) * r->stepsize; + s->u.b.data[0] = r; + r = s; + } + + result = r; + + while (r->layers > 0) { + ldiv_t l; + struct RAA **s; + l = ldiv (posn, r->stepsize); + s = &r->u.b.data[l.quot]; + if (!*s) + *s = real_raa_init (r->layers - 1); + r = *s; + posn = l.rem; + } + + r->u.l.data[posn] = value; + + return result; +} + +#define SAA_MAXLEN 8192 + +struct SAA *saa_init (long elem_len) +{ + struct SAA *s; + + if (elem_len > SAA_MAXLEN) + nasm_malloc_error (ERR_PANIC | ERR_NOFILE, "SAA with huge elements"); + + s = nasm_malloc (sizeof(struct SAA)); + s->posn = s->start = 0L; + s->elem_len = elem_len; + s->length = SAA_MAXLEN - (SAA_MAXLEN % elem_len); + s->data = nasm_malloc (s->length); + s->next = NULL; + s->end = s; + + return s; +} + +void saa_free (struct SAA *s) +{ + struct SAA *t; + + while (s) { + t = s->next; + nasm_free (s->data); + nasm_free (s); + s = t; + } +} + +void *saa_wstruct (struct SAA *s) +{ + void *p; + + if (s->end->length - s->end->posn < s->elem_len) { + s->end->next = nasm_malloc (sizeof(struct SAA)); + s->end->next->start = s->end->start + s->end->posn; + s->end = s->end->next; + s->end->length = s->length; + s->end->next = NULL; + s->end->posn = 0L; + s->end->data = nasm_malloc (s->length); + } + + p = s->end->data + s->end->posn; + s->end->posn += s->elem_len; + return p; +} + +void saa_wbytes (struct SAA *s, const void *data, long len) +{ + const char *d = data; + + while (len > 0) { + long l = s->end->length - s->end->posn; + if (l > len) + l = len; + if (l > 0) { + if (d) { + memcpy (s->end->data + s->end->posn, d, l); + d += l; + } else + memset (s->end->data + s->end->posn, 0, l); + s->end->posn += l; + len -= l; + } + if (len > 0) { + s->end->next = nasm_malloc (sizeof(struct SAA)); + s->end->next->start = s->end->start + s->end->posn; + s->end = s->end->next; + s->end->length = s->length; + s->end->next = NULL; + s->end->posn = 0L; + s->end->data = nasm_malloc (s->length); + } + } +} + +void saa_rewind (struct SAA *s) +{ + s->rptr = s; + s->rpos = 0L; +} + +void *saa_rstruct (struct SAA *s) +{ + void *p; + + if (!s->rptr) + return NULL; + + if (s->rptr->posn - s->rpos < s->elem_len) { + s->rptr = s->rptr->next; + if (!s->rptr) + return NULL; /* end of array */ + s->rpos = 0L; + } + + p = s->rptr->data + s->rpos; + s->rpos += s->elem_len; + return p; +} + +void *saa_rbytes (struct SAA *s, long *len) +{ + void *p; + + if (!s->rptr) + return NULL; + + p = s->rptr->data + s->rpos; + *len = s->rptr->posn - s->rpos; + s->rptr = s->rptr->next; + s->rpos = 0L; + return p; +} + +void saa_rnbytes (struct SAA *s, void *data, long len) +{ + char *d = data; + + while (len > 0) { + long l; + + if (!s->rptr) + return; + + l = s->rptr->posn - s->rpos; + if (l > len) + l = len; + if (l > 0) { + memcpy (d, s->rptr->data + s->rpos, l); + d += l; + s->rpos += l; + len -= l; + } + if (len > 0) { + s->rptr = s->rptr->next; + s->rpos = 0L; + } + } +} + +void saa_fread (struct SAA *s, long posn, void *data, long len) +{ + struct SAA *p; + long pos; + char *cdata = data; + + if (!s->rptr || posn < s->rptr->start) + saa_rewind (s); + p = s->rptr; + while (posn >= p->start + p->posn) { + p = p->next; + if (!p) + return; /* what else can we do?! */ + } + + pos = posn - p->start; + while (len) { + long l = p->posn - pos; + if (l > len) + l = len; + memcpy (cdata, p->data+pos, l); + len -= l; + cdata += l; + p = p->next; + if (!p) + return; + pos = 0L; + } + s->rptr = p; +} + +void saa_fwrite (struct SAA *s, long posn, void *data, long len) +{ + struct SAA *p; + long pos; + char *cdata = data; + + if (!s->rptr || posn < s->rptr->start) + saa_rewind (s); + p = s->rptr; + while (posn >= p->start + p->posn) { + p = p->next; + if (!p) + return; /* what else can we do?! */ + } + + pos = posn - p->start; + while (len) { + long l = p->posn - pos; + if (l > len) + l = len; + memcpy (p->data+pos, cdata, l); + len -= l; + cdata += l; + p = p->next; + if (!p) + return; + pos = 0L; + } + s->rptr = p; +} + +void saa_fpwrite (struct SAA *s, FILE *fp) +{ + char *data; + long len; + + saa_rewind (s); + while ( (data = saa_rbytes (s, &len)) ) + fwrite (data, 1, len, fp); +} + +/* + * Register, instruction, condition-code and prefix keywords used + * by the scanner. + */ +#include "names.c" +static const char *special_names[] = { + "byte", "dword", "far", "long", "near", "nosplit", "qword", + "short", "strict", "to", "tword", "word" +}; +static const char *prefix_names[] = { + "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne", + "repnz", "repz", "times" +}; + + +/* + * Standard scanner routine used by parser.c and some output + * formats. It keeps a succession of temporary-storage strings in + * stdscan_tempstorage, which can be cleared using stdscan_reset. + */ +static char **stdscan_tempstorage = NULL; +static int stdscan_tempsize = 0, stdscan_templen = 0; +#define STDSCAN_TEMP_DELTA 256 + +static void stdscan_pop(void) +{ + nasm_free (stdscan_tempstorage[--stdscan_templen]); +} + +void stdscan_reset(void) +{ + while (stdscan_templen > 0) + stdscan_pop(); +} + +/* + * Unimportant cleanup is done to avoid confusing people who are trying + * to debug real memory leaks + */ +void nasmlib_cleanup (void) +{ + stdscan_reset(); + nasm_free (stdscan_tempstorage); +} + +static char *stdscan_copy(char *p, int len) +{ + char *text; + + text = nasm_malloc(len+1); + strncpy (text, p, len); + text[len] = '\0'; + + if (stdscan_templen >= stdscan_tempsize) { + stdscan_tempsize += STDSCAN_TEMP_DELTA; + stdscan_tempstorage = nasm_realloc(stdscan_tempstorage, + stdscan_tempsize*sizeof(char *)); + } + stdscan_tempstorage[stdscan_templen++] = text; + + return text; +} + +char *stdscan_bufptr = NULL; +int stdscan (void *private_data, struct tokenval *tv) +{ + char ourcopy[MAX_KEYWORD+1], *r, *s; + + (void) private_data; /* Don't warn that this parameter is unused */ + + while (isspace(*stdscan_bufptr)) stdscan_bufptr++; + if (!*stdscan_bufptr) + return tv->t_type = 0; + + /* we have a token; either an id, a number or a char */ + if (isidstart(*stdscan_bufptr) || + (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1]))) { + /* now we've got an identifier */ + int i; + int is_sym = FALSE; + + if (*stdscan_bufptr == '$') { + is_sym = TRUE; + stdscan_bufptr++; + } + + r = stdscan_bufptr++; + while (isidchar(*stdscan_bufptr)) stdscan_bufptr++; + tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r); + + if (is_sym || stdscan_bufptr-r > MAX_KEYWORD) + return tv->t_type = TOKEN_ID;/* bypass all other checks */ + + for (s=tv->t_charptr, r=ourcopy; *s; s++) + *r++ = tolower (*s); + *r = '\0'; + /* right, so we have an identifier sitting in temp storage. now, + * is it actually a register or instruction name, or what? */ + if ((tv->t_integer=bsi(ourcopy, reg_names, + elements(reg_names)))>=0) { + tv->t_integer += EXPR_REG_START; + return tv->t_type = TOKEN_REG; + } else if ((tv->t_integer=bsi(ourcopy, insn_names, + elements(insn_names)))>=0) { + return tv->t_type = TOKEN_INSN; + } + for (i=0; it_integer = ico[i]; + if ((tv->t_inttwo=bsi(p, conditions, + elements(conditions)))>=0) + return tv->t_type = TOKEN_INSN; + } + if ((tv->t_integer=bsi(ourcopy, prefix_names, + elements(prefix_names)))>=0) { + tv->t_integer += PREFIX_ENUM_START; + return tv->t_type = TOKEN_PREFIX; + } + if ((tv->t_integer=bsi(ourcopy, special_names, + elements(special_names)))>=0) + return tv->t_type = TOKEN_SPECIAL; + if (!nasm_stricmp(ourcopy, "seg")) + return tv->t_type = TOKEN_SEG; + if (!nasm_stricmp(ourcopy, "wrt")) + return tv->t_type = TOKEN_WRT; + return tv->t_type = TOKEN_ID; + } else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) { + /* + * It's a $ sign with no following hex number; this must + * mean it's a Here token ($), evaluating to the current + * assembly location, or a Base token ($$), evaluating to + * the base of the current segment. + */ + stdscan_bufptr++; + if (*stdscan_bufptr == '$') { + stdscan_bufptr++; + return tv->t_type = TOKEN_BASE; + } + return tv->t_type = TOKEN_HERE; + } else if (isnumstart(*stdscan_bufptr)) { /* now we've got a number */ + int rn_error; + + r = stdscan_bufptr++; + while (isnumchar(*stdscan_bufptr)) + stdscan_bufptr++; + + if (*stdscan_bufptr == '.') { + /* + * a floating point constant + */ + stdscan_bufptr++; + while (isnumchar(*stdscan_bufptr) || + ((stdscan_bufptr[-1] == 'e' || stdscan_bufptr[-1] == 'E') + && (*stdscan_bufptr == '-' || *stdscan_bufptr == '+')) ) + { + stdscan_bufptr++; + } + tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r); + return tv->t_type = TOKEN_FLOAT; + } + r = stdscan_copy(r, stdscan_bufptr - r); + tv->t_integer = readnum(r, &rn_error); + stdscan_pop(); + if (rn_error) + return tv->t_type = TOKEN_ERRNUM;/* some malformation occurred */ + tv->t_charptr = NULL; + return tv->t_type = TOKEN_NUM; + } else if (*stdscan_bufptr == '\'' || + *stdscan_bufptr == '"') {/* a char constant */ + char quote = *stdscan_bufptr++, *r; + int rn_warn; + r = tv->t_charptr = stdscan_bufptr; + while (*stdscan_bufptr && *stdscan_bufptr != quote) stdscan_bufptr++; + tv->t_inttwo = stdscan_bufptr - r; /* store full version */ + if (!*stdscan_bufptr) + return tv->t_type = TOKEN_ERRNUM; /* unmatched quotes */ + stdscan_bufptr++; /* skip over final quote */ + tv->t_integer = readstrnum(r, tv->t_inttwo, &rn_warn); + /* FIXME: rn_warn is not checked! */ + return tv->t_type = TOKEN_NUM; + } else if (*stdscan_bufptr == ';') { /* a comment has happened - stay */ + return tv->t_type = 0; + } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '>') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_SHR; + } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '<') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_SHL; + } else if (stdscan_bufptr[0] == '/' && stdscan_bufptr[1] == '/') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_SDIV; + } else if (stdscan_bufptr[0] == '%' && stdscan_bufptr[1] == '%') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_SMOD; + } else if (stdscan_bufptr[0] == '=' && stdscan_bufptr[1] == '=') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_EQ; + } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '>') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_NE; + } else if (stdscan_bufptr[0] == '!' && stdscan_bufptr[1] == '=') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_NE; + } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '=') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_LE; + } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '=') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_GE; + } else if (stdscan_bufptr[0] == '&' && stdscan_bufptr[1] == '&') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_DBL_AND; + } else if (stdscan_bufptr[0] == '^' && stdscan_bufptr[1] == '^') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_DBL_XOR; + } else if (stdscan_bufptr[0] == '|' && stdscan_bufptr[1] == '|') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_DBL_OR; + } else /* just an ordinary char */ + return tv->t_type = (unsigned char) (*stdscan_bufptr++); +} + +/* + * Return TRUE if the argument is a simple scalar. (Or a far- + * absolute, which counts.) + */ +int is_simple (expr *vect) +{ + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 1; + if (vect->type != EXPR_SIMPLE) + return 0; + do { + vect++; + } while (vect->type && !vect->value); + if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0; + return 1; +} + +/* + * Return TRUE if the argument is a simple scalar, _NOT_ a far- + * absolute. + */ +int is_really_simple (expr *vect) +{ + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 1; + if (vect->type != EXPR_SIMPLE) + return 0; + do { + vect++; + } while (vect->type && !vect->value); + if (vect->type) return 0; + return 1; +} + +/* + * Return TRUE if the argument is relocatable (i.e. a simple + * scalar, plus at most one segment-base, plus possibly a WRT). + */ +int is_reloc (expr *vect) +{ + while (vect->type && !vect->value) /* skip initial value-0 terms */ + vect++; + if (!vect->type) /* trivially return TRUE if nothing */ + return 1; /* is present apart from value-0s */ + if (vect->type < EXPR_SIMPLE) /* FALSE if a register is present */ + return 0; + if (vect->type == EXPR_SIMPLE) { /* skip over a pure number term... */ + do { + vect++; + } while (vect->type && !vect->value); + if (!vect->type) /* ...returning TRUE if that's all */ + return 1; + } + if (vect->type == EXPR_WRT) { /* skip over a WRT term... */ + do { + vect++; + } while (vect->type && !vect->value); + if (!vect->type) /* ...returning TRUE if that's all */ + return 1; + } + if (vect->value != 0 && vect->value != 1) + return 0; /* segment base multiplier non-unity */ + do { /* skip over _one_ seg-base term... */ + vect++; + } while (vect->type && !vect->value); + if (!vect->type) /* ...returning TRUE if that's all */ + return 1; + return 0; /* And return FALSE if there's more */ +} + +/* + * Return TRUE if the argument contains an `unknown' part. + */ +int is_unknown(expr *vect) +{ + while (vect->type && vect->type < EXPR_UNKNOWN) + vect++; + return (vect->type == EXPR_UNKNOWN); +} + +/* + * Return TRUE if the argument contains nothing but an `unknown' + * part. + */ +int is_just_unknown(expr *vect) +{ + while (vect->type && !vect->value) + vect++; + return (vect->type == EXPR_UNKNOWN); +} + +/* + * Return the scalar part of a relocatable vector. (Including + * simple scalar vectors - those qualify as relocatable.) + */ +long reloc_value (expr *vect) +{ + while (vect->type && !vect->value) + vect++; + if (!vect->type) return 0; + if (vect->type == EXPR_SIMPLE) + return vect->value; + else + return 0; +} + +/* + * Return the segment number of a relocatable vector, or NO_SEG for + * simple scalars. + */ +long reloc_seg (expr *vect) +{ + while (vect->type && (vect->type == EXPR_WRT || !vect->value)) + vect++; + if (vect->type == EXPR_SIMPLE) { + do { + vect++; + } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); + } + if (!vect->type) + return NO_SEG; + else + return vect->type - EXPR_SEGBASE; +} + +/* + * Return the WRT segment number of a relocatable vector, or NO_SEG + * if no WRT part is present. + */ +long reloc_wrt (expr *vect) +{ + while (vect->type && vect->type < EXPR_WRT) + vect++; + if (vect->type == EXPR_WRT) { + return vect->value; + } else + return NO_SEG; +} + +/* + * Binary search. + */ +int bsi (char *string, const char **array, int size) +{ + int i = -1, j = size; /* always, i < index < j */ + while (j-i >= 2) { + int k = (i+j)/2; + int l = strcmp(string, array[k]); + if (l<0) /* it's in the first half */ + j = k; + else if (l>0) /* it's in the second half */ + i = k; + else /* we've got it :) */ + return k; + } + return -1; /* we haven't got it :( */ +} + +static char *file_name = NULL; +static long line_number = 0; + +char *src_set_fname(char *newname) +{ + char *oldname = file_name; + file_name = newname; + return oldname; +} + +long src_set_linnum(long newline) +{ + long oldline = line_number; + line_number = newline; + return oldline; +} + +long src_get_linnum(void) +{ + return line_number; +} + +int src_get(long *xline, char **xname) +{ + if (!file_name || !*xname || strcmp(*xname, file_name)) + { + nasm_free(*xname); + *xname = file_name ? nasm_strdup(file_name) : NULL; + *xline = line_number; + return -2; + } + if (*xline != line_number) + { + long tmp = line_number - *xline; + *xline = line_number; + return tmp; + } + return 0; +} + +void nasm_quote(char **str) +{ + int ln=strlen(*str); + char q=(*str)[0]; + char *p; + if (ln>1 && (*str)[ln-1]==q && (q=='"' || q=='\'')) + return; + q = '"'; + if (strchr(*str,q)) + q = '\''; + p = nasm_malloc(ln+3); + strcpy(p+1, *str); + nasm_free(*str); + p[ln+1] = p[0] = q; + p[ln+2] = 0; + *str = p; +} + +char *nasm_strcat(char *one, char *two) +{ + char *rslt; + int l1=strlen(one); + rslt = nasm_malloc(l1+strlen(two)+1); + strcpy(rslt, one); + strcpy(rslt+l1, two); + return rslt; +} + +void null_debug_init(struct ofmt *of, void *id, FILE *fp, efunc error ) {} +void null_debug_linenum(const char *filename, long linenumber, long segto) {} +void null_debug_deflabel(char *name, long segment, long offset, int is_global, char *special) {} +void null_debug_routine(const char *directive, const char *params) {} +void null_debug_typevalue(long type) {} +void null_debug_output(int type, void *param) {} +void null_debug_cleanup(void){} + +struct dfmt null_debug_form = { + "Null debug format", + "null", + null_debug_init, + null_debug_linenum, + null_debug_deflabel, + null_debug_routine, + null_debug_typevalue, + null_debug_output, + null_debug_cleanup +}; + +struct dfmt *null_debug_arr[2] = { &null_debug_form, NULL }; diff --git a/modules/preprocs/nasm/nasmlib.h b/modules/preprocs/nasm/nasmlib.h new file mode 100644 index 00000000..54964539 --- /dev/null +++ b/modules/preprocs/nasm/nasmlib.h @@ -0,0 +1,258 @@ +/* nasmlib.h header file for nasmlib.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_NASMLIB_H +#define NASM_NASMLIB_H + +/* + * If this is defined, the wrappers around malloc et al will + * transform into logging variants, which will cause NASM to create + * a file called `malloc.log' when run, and spew details of all its + * memory management into that. That can then be analysed to detect + * memory leaks and potentially other problems too. + */ +/* #define LOGALLOC */ + +/* + * Wrappers around malloc, realloc and free. nasm_malloc will + * fatal-error and die rather than return NULL; nasm_realloc will + * do likewise, and will also guarantee to work right on being + * passed a NULL pointer; nasm_free will do nothing if it is passed + * a NULL pointer. + */ +#ifdef NASM_NASM_H /* need efunc defined for this */ +void nasm_set_malloc_error (efunc); +#ifndef LOGALLOC +void *nasm_malloc (size_t); +void *nasm_realloc (void *, size_t); +void nasm_free (void *); +char *nasm_strdup (const char *); +char *nasm_strndup (char *, size_t); +#else +void *nasm_malloc_log (char *, int, size_t); +void *nasm_realloc_log (char *, int, void *, size_t); +void nasm_free_log (char *, int, void *); +char *nasm_strdup_log (char *, int, const char *); +char *nasm_strndup_log (char *, int, char *, size_t); +#define nasm_malloc(x) nasm_malloc_log(__FILE__,__LINE__,x) +#define nasm_realloc(x,y) nasm_realloc_log(__FILE__,__LINE__,x,y) +#define nasm_free(x) nasm_free_log(__FILE__,__LINE__,x) +#define nasm_strdup(x) nasm_strdup_log(__FILE__,__LINE__,x) +#define nasm_strndup(x,y) nasm_strndup_log(__FILE__,__LINE__,x,y) +#endif +#endif + +/* + * ANSI doesn't guarantee the presence of `stricmp' or + * `strcasecmp'. + */ +#if defined(stricmp) || defined(strcasecmp) +#if defined(stricmp) +#define nasm_stricmp stricmp +#else +#define nasm_stricmp strcasecmp +#endif +#else +int nasm_stricmp (const char *, const char *); +#endif + +#if defined(strnicmp) || defined(strncasecmp) +#if defined(strnicmp) +#define nasm_strnicmp strnicmp +#else +#define nasm_strnicmp strncasecmp +#endif +#else +int nasm_strnicmp (const char *, const char *, int); +#endif + +/* + * Convert a string into a number, using NASM number rules. Sets + * `*error' to TRUE if an error occurs, and FALSE otherwise. + */ +long readnum(char *str, int *error); + +/* + * Convert a character constant into a number. Sets + * `*warn' to TRUE if an overflow occurs, and FALSE otherwise. + * str points to and length covers the middle of the string, + * without the quotes. + */ +long readstrnum(char *str, int length, int *warn); + +/* + * seg_init: Initialise the segment-number allocator. + * seg_alloc: allocate a hitherto unused segment number. + */ +void seg_init(void); +long seg_alloc(void); + +/* + * many output formats will be able to make use of this: a standard + * function to add an extension to the name of the input file + */ +#ifdef NASM_NASM_H +void standard_extension (char *inname, char *outname, char *extension, + efunc error); +#endif + +/* + * some handy macros that will probably be of use in more than one + * output format: convert integers into little-endian byte packed + * format in memory + */ + +#define WRITELONG(p,v) \ + do { \ + *(p)++ = (v) & 0xFF; \ + *(p)++ = ((v) >> 8) & 0xFF; \ + *(p)++ = ((v) >> 16) & 0xFF; \ + *(p)++ = ((v) >> 24) & 0xFF; \ + } while (0) + +#define WRITESHORT(p,v) \ + do { \ + *(p)++ = (v) & 0xFF; \ + *(p)++ = ((v) >> 8) & 0xFF; \ + } while (0) + +/* + * and routines to do the same thing to a file + */ +void fwriteshort (int data, FILE *fp); +void fwritelong (long data, FILE *fp); + +/* + * Routines to manage a dynamic random access array of longs which + * may grow in size to be more than the largest single malloc'able + * chunk. + */ + +#define RAA_BLKSIZE 4096 /* this many longs allocated at once */ +#define RAA_LAYERSIZE 1024 /* this many _pointers_ allocated */ + +typedef struct RAA RAA; +typedef union RAA_UNION RAA_UNION; +typedef struct RAA_LEAF RAA_LEAF; +typedef struct RAA_BRANCH RAA_BRANCH; + +struct RAA { + /* + * Number of layers below this one to get to the real data. 0 + * means this structure is a leaf, holding RAA_BLKSIZE real + * data items; 1 and above mean it's a branch, holding + * RAA_LAYERSIZE pointers to the next level branch or leaf + * structures. + */ + int layers; + /* + * Number of real data items spanned by one position in the + * `data' array at this level. This number is 1, trivially, for + * a leaf (level 0): for a level 1 branch it should be + * RAA_BLKSIZE, and for a level 2 branch it's + * RAA_LAYERSIZE*RAA_BLKSIZE. + */ + long stepsize; + union RAA_UNION { + struct RAA_LEAF { + long data[RAA_BLKSIZE]; + } l; + struct RAA_BRANCH { + struct RAA *data[RAA_LAYERSIZE]; + } b; + } u; +}; + + +struct RAA *raa_init (void); +void raa_free (struct RAA *); +long raa_read (struct RAA *, long); +struct RAA *raa_write (struct RAA *r, long posn, long value); + +/* + * Routines to manage a dynamic sequential-access array, under the + * same restriction on maximum mallocable block. This array may be + * written to in two ways: a contiguous chunk can be reserved of a + * given size, and a pointer returned, or single-byte data may be + * written. The array can also be read back in the same two ways: + * as a series of big byte-data blocks or as a list of structures + * of a given size. + */ + +struct SAA { + /* + * members `end' and `elem_len' are only valid in first link in + * list; `rptr' and `rpos' are used for reading + */ + struct SAA *next, *end, *rptr; + long elem_len, length, posn, start, rpos; + char *data; +}; + +struct SAA *saa_init (long elem_len); /* 1 == byte */ +void saa_free (struct SAA *); +void *saa_wstruct (struct SAA *); /* return a structure of elem_len */ +void saa_wbytes (struct SAA *, const void *, long); /* write arbitrary bytes */ +void saa_rewind (struct SAA *); /* for reading from beginning */ +void *saa_rstruct (struct SAA *); /* return NULL on EOA */ +void *saa_rbytes (struct SAA *, long *); /* return 0 on EOA */ +void saa_rnbytes (struct SAA *, void *, long); /* read a given no. of bytes */ +void saa_fread (struct SAA *s, long posn, void *p, long len); /* fixup */ +void saa_fwrite (struct SAA *s, long posn, void *p, long len); /* fixup */ +void saa_fpwrite (struct SAA *, FILE *); + +#ifdef NASM_NASM_H +/* + * Standard scanner. + */ +extern char *stdscan_bufptr; +void stdscan_reset(void); +int stdscan (void *private_data, struct tokenval *tv); +#endif + +#ifdef NASM_NASM_H +/* + * Library routines to manipulate expression data types. + */ +int is_reloc(expr *); +int is_simple(expr *); +int is_really_simple (expr *); +int is_unknown(expr *); +int is_just_unknown(expr *); +long reloc_value(expr *); +long reloc_seg(expr *); +long reloc_wrt(expr *); +#endif + +/* + * Binary search routine. Returns index into `array' of an entry + * matching `string', or <0 if no match. `array' is taken to + * contain `size' elements. + */ +int bsi (char *string, const char **array, int size); + + +char *src_set_fname(char *newname); +long src_set_linnum(long newline); +long src_get_linnum(void); +/* + * src_get may be used if you simply want to know the source file and line. + * It is also used if you maintain private status about the source location + * It return 0 if the information was the same as the last time you + * checked, -1 if the name changed and (new-old) if just the line changed. + */ +int src_get(long *xline, char **xname); + +void nasm_quote(char **str); +char *nasm_strcat(char *one, char *two); +void nasmlib_cleanup(void); + +void null_debug_routine(const char *directive, const char *params); +extern struct dfmt null_debug_form; +extern struct dfmt *null_debug_arr[2]; +#endif diff --git a/modules/preprocs/nasm/standard.mac b/modules/preprocs/nasm/standard.mac new file mode 100644 index 00000000..bbbf90d8 --- /dev/null +++ b/modules/preprocs/nasm/standard.mac @@ -0,0 +1,110 @@ +; Standard macro set for NASM -*- nasm -*- + +; Macros to make NASM ignore some TASM directives before the first include +; directive. + + %idefine IDEAL + %idefine JUMPS + %idefine P386 + %idefine P486 + %idefine P586 + %idefine END + +; This is a magic token which indicates the end of the TASM macros +*END*TASM*MACROS* + +; Note that although some user-level forms of directives are defined +; here, not all of them are: the user-level form of a format-specific +; directive should be defined in the module for that directive. + +; These two need to be defined, though the actual definitions will +; be constantly updated during preprocessing. +%define __FILE__ +%define __LINE__ + +%define __SECT__ ; it ought to be defined, even if as nothing + +%imacro section 1+.nolist +%define __SECT__ [section %1] + __SECT__ +%endmacro +%imacro segment 1+.nolist +%define __SECT__ [segment %1] + __SECT__ +%endmacro + +%imacro absolute 1+.nolist +%define __SECT__ [absolute %1] + __SECT__ +%endmacro + +%imacro struc 1.nolist +%push struc +%define %$strucname %1 +[absolute 0] +%$strucname: ; allow definition of `.member' to work sanely +%endmacro +%imacro endstruc 0.nolist +%{$strucname}_size: +%pop +__SECT__ +%endmacro + +%imacro istruc 1.nolist +%push istruc +%define %$strucname %1 +%$strucstart: +%endmacro +%imacro at 1-2+.nolist + times %1-($-%$strucstart) db 0 + %2 +%endmacro +%imacro iend 0.nolist + times %{$strucname}_size-($-%$strucstart) db 0 +%pop +%endmacro + +%imacro align 1-2+.nolist nop + times ($$-$) & ((%1)-1) %2 +%endmacro +%imacro alignb 1-2+.nolist resb 1 + times ($$-$) & ((%1)-1) %2 +%endmacro + +%imacro extern 1-*.nolist +%rep %0 +[extern %1] +%rotate 1 +%endrep +%endmacro + +%imacro bits 1+.nolist +[bits %1] +%endmacro + +%imacro use16 0.nolist +[bits 16] +%endmacro +%imacro use32 0.nolist +[bits 32] +%endmacro + +%imacro global 1-*.nolist +%rep %0 +[global %1] +%rotate 1 +%endrep +%endmacro + +%imacro common 1-*.nolist +%rep %0 +[common %1] +%rotate 1 +%endrep +%endmacro + +%imacro cpu 1+.nolist +[cpu %1] +%endmacro + + diff --git a/src/preprocs/nasm/macros.pl b/src/preprocs/nasm/macros.pl new file mode 100644 index 00000000..0934d174 --- /dev/null +++ b/src/preprocs/nasm/macros.pl @@ -0,0 +1,48 @@ +#!/usr/bin/perl -w +# +# macros.pl produce macros.c from standard.mac +# +# The Netwide Assembler is copyright (C) 1996 Simon Tatham and +# Julian Hall. All rights reserved. The software is +# redistributable under the licence given in the file "Licence" +# distributed in the NASM archive. + +use strict; + +my $fname; +my $line = 0; +my $index = 0; +my $tasm_count; + +undef $tasm_count; + +open(OUTPUT,">macros.c") or die "unable to open macros.c\n"; + +print OUTPUT "/* This file auto-generated from standard.mac by macros.pl" . +" - don't edit it */\n\n#include \n\nstatic const char *stdmac[] = {\n"; + +foreach $fname ( @ARGV ) { + open(INPUT,$fname) or die "unable to open $fname\n"; + while () { + $line++; + chomp; + if (m/^\s*\*END\*TASM\*MACROS\*\s*$/) { + $tasm_count = $index; + } elsif (m/^\s*((\s*([^\"\';\s]+|\"[^\"]*\"|\'[^\']*\'))*)\s*(;.*)?$/) { + $_ = $1; + s/\\/\\\\/g; + s/"/\\"/g; + if (length > 0) { + print OUTPUT " \"$_\",\n"; + $index++; + } + } else { + die "$fname:$line: error unterminated quote"; + } + } + close(INPUT); +} +print OUTPUT " NULL\n};\n"; +$tasm_count = $index unless ( defined($tasm_count) ); +print OUTPUT "#define TASM_MACRO_COUNT $tasm_count\n"; +close(OUTPUT); diff --git a/src/preprocs/nasm/nasm-eval.c b/src/preprocs/nasm/nasm-eval.c new file mode 100644 index 00000000..28aca642 --- /dev/null +++ b/src/preprocs/nasm/nasm-eval.c @@ -0,0 +1,825 @@ +/* eval.c expression evaluator for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version 27/iii/95 by Simon Tatham + */ + +#include +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" +#include "eval.h" +#include "labels.h" + +#define TEMPEXPRS_DELTA 128 +#define TEMPEXPR_DELTA 8 + +static scanner scan; /* Address of scanner routine */ +static efunc error; /* Address of error reporting routine */ +static lfunc labelfunc; /* Address of label routine */ + +static struct ofmt *outfmt; /* Structure of addresses of output routines */ + +static expr **tempexprs = NULL; +static int ntempexprs; +static int tempexprs_size = 0; + +static expr *tempexpr; +static int ntempexpr; +static int tempexpr_size; + +static struct tokenval *tokval; /* The current token */ +static int i; /* The t_type of tokval */ + +static void *scpriv; +static loc_t *location; /* Pointer to current line's segment,offset */ +static int *opflags; + +static struct eval_hints *hint; + +extern int in_abs_seg; /* ABSOLUTE segment flag */ +extern long abs_seg; /* ABSOLUTE segment */ +extern long abs_offset; /* ABSOLUTE segment offset */ + +/* + * Unimportant cleanup is done to avoid confusing people who are trying + * to debug real memory leaks + */ +void eval_cleanup(void) +{ + while (ntempexprs) + nasm_free (tempexprs[--ntempexprs]); + nasm_free (tempexprs); +} + +/* + * Construct a temporary expression. + */ +static void begintemp(void) +{ + tempexpr = NULL; + tempexpr_size = ntempexpr = 0; +} + +static void addtotemp(long type, long value) +{ + while (ntempexpr >= tempexpr_size) { + tempexpr_size += TEMPEXPR_DELTA; + tempexpr = nasm_realloc(tempexpr, + tempexpr_size*sizeof(*tempexpr)); + } + tempexpr[ntempexpr].type = type; + tempexpr[ntempexpr++].value = value; +} + +static expr *finishtemp(void) +{ + addtotemp (0L, 0L); /* terminate */ + while (ntempexprs >= tempexprs_size) { + tempexprs_size += TEMPEXPRS_DELTA; + tempexprs = nasm_realloc(tempexprs, + tempexprs_size*sizeof(*tempexprs)); + } + return tempexprs[ntempexprs++] = tempexpr; +} + +/* + * Add two vector datatypes. We have some bizarre behaviour on far- + * absolute segment types: we preserve them during addition _only_ + * if one of the segments is a truly pure scalar. + */ +static expr *add_vectors(expr *p, expr *q) +{ + int preserve; + + preserve = is_really_simple(p) || is_really_simple(q); + + begintemp(); + + while (p->type && q->type && + p->type < EXPR_SEGBASE+SEG_ABS && + q->type < EXPR_SEGBASE+SEG_ABS) + { + int lasttype; + + if (p->type > q->type) { + addtotemp(q->type, q->value); + lasttype = q++->type; + } else if (p->type < q->type) { + addtotemp(p->type, p->value); + lasttype = p++->type; + } else { /* *p and *q have same type */ + long sum = p->value + q->value; + if (sum) + addtotemp(p->type, sum); + lasttype = p->type; + p++, q++; + } + if (lasttype == EXPR_UNKNOWN) { + return finishtemp(); + } + } + while (p->type && + (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) + { + addtotemp(p->type, p->value); + p++; + } + while (q->type && + (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) + { + addtotemp(q->type, q->value); + q++; + } + + return finishtemp(); +} + +/* + * Multiply a vector by a scalar. Strip far-absolute segment part + * if present. + * + * Explicit treatment of UNKNOWN is not required in this routine, + * since it will silently do the Right Thing anyway. + * + * If `affect_hints' is set, we also change the hint type to + * NOTBASE if a MAKEBASE hint points at a register being + * multiplied. This allows [eax*1+ebx] to hint EBX rather than EAX + * as the base register. + */ +static expr *scalar_mult(expr *vect, long scalar, int affect_hints) +{ + expr *p = vect; + + while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) { + p->value = scalar * (p->value); + if (hint && hint->type == EAH_MAKEBASE && + p->type == hint->base && affect_hints) + hint->type = EAH_NOTBASE; + p++; + } + p->type = 0; + + return vect; +} + +static expr *scalarvect (long scalar) +{ + begintemp(); + addtotemp(EXPR_SIMPLE, scalar); + return finishtemp(); +} + +static expr *unknown_expr (void) +{ + begintemp(); + addtotemp(EXPR_UNKNOWN, 1L); + return finishtemp(); +} + +/* + * The SEG operator: calculate the segment part of a relocatable + * value. Return NULL, as usual, if an error occurs. Report the + * error too. + */ +static expr *segment_part (expr *e) +{ + long seg; + + if (is_unknown(e)) + return unknown_expr(); + + if (!is_reloc(e)) { + error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); + return NULL; + } + + seg = reloc_seg(e); + if (seg == NO_SEG) { + error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); + return NULL; + } else if (seg & SEG_ABS) { + return scalarvect(seg & ~SEG_ABS); + } else if (seg & 1) { + error(ERR_NONFATAL, "SEG applied to something which" + " is already a segment base"); + return NULL; + } + else { + long base = outfmt->segbase(seg+1); + + begintemp(); + addtotemp((base == NO_SEG ? EXPR_UNKNOWN : EXPR_SEGBASE+base), 1L); + return finishtemp(); + } +} + +/* + * Recursive-descent parser. Called with a single boolean operand, + * which is TRUE if the evaluation is critical (i.e. unresolved + * symbols are an error condition). Must update the global `i' to + * reflect the token after the parsed string. May return NULL. + * + * evaluate() should report its own errors: on return it is assumed + * that if NULL has been returned, the error has already been + * reported. + */ + +/* + * Grammar parsed is: + * + * expr : bexpr [ WRT expr6 ] + * bexpr : rexp0 or expr0 depending on relative-mode setting + * rexp0 : rexp1 [ {||} rexp1...] + * rexp1 : rexp2 [ {^^} rexp2...] + * rexp2 : rexp3 [ {&&} rexp3...] + * rexp3 : expr0 [ {=,==,<>,!=,<,>,<=,>=} expr0 ] + * expr0 : expr1 [ {|} expr1...] + * expr1 : expr2 [ {^} expr2...] + * expr2 : expr3 [ {&} expr3...] + * expr3 : expr4 [ {<<,>>} expr4...] + * expr4 : expr5 [ {+,-} expr5...] + * expr5 : expr6 [ {*,/,%,//,%%} expr6...] + * expr6 : { ~,+,-,SEG } expr6 + * | (bexpr) + * | symbol + * | $ + * | number + */ + +static expr *rexp0(int), *rexp1(int), *rexp2(int), *rexp3(int); + +static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int); +static expr *expr4(int), *expr5(int), *expr6(int); + +static expr *(*bexpr)(int); + +static expr *rexp0(int critical) +{ + expr *e, *f; + + e = rexp1(critical); + if (!e) + return NULL; + + while (i == TOKEN_DBL_OR) + { + i = scan(scpriv, tokval); + f = rexp1(critical); + if (!f) + return NULL; + if (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f))) + { + error(ERR_NONFATAL, "`|' operator may only be applied to" + " scalar values"); + } + + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect ((long) (reloc_value(e) || reloc_value(f))); + } + return e; +} + +static expr *rexp1(int critical) +{ + expr *e, *f; + + e = rexp2(critical); + if (!e) + return NULL; + + while (i == TOKEN_DBL_XOR) + { + i = scan(scpriv, tokval); + f = rexp2(critical); + if (!f) + return NULL; + if (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f))) + { + error(ERR_NONFATAL, "`^' operator may only be applied to" + " scalar values"); + } + + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect ((long) (!reloc_value(e) ^ !reloc_value(f))); + } + return e; +} + +static expr *rexp2(int critical) +{ + expr *e, *f; + + e = rexp3(critical); + if (!e) + return NULL; + while (i == TOKEN_DBL_AND) + { + i = scan(scpriv, tokval); + f = rexp3(critical); + if (!f) + return NULL; + if (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f))) + { + error(ERR_NONFATAL, "`&' operator may only be applied to" + " scalar values"); + } + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect ((long) (reloc_value(e) && reloc_value(f))); + } + return e; +} + +static expr *rexp3(int critical) +{ + expr *e, *f; + long v; + + e = expr0(critical); + if (!e) + return NULL; + + while (i == TOKEN_EQ || i == TOKEN_LT || i == TOKEN_GT || + i == TOKEN_NE || i == TOKEN_LE || i == TOKEN_GE) + { + int j = i; + i = scan(scpriv, tokval); + f = expr0(critical); + if (!f) + return NULL; + + e = add_vectors (e, scalar_mult(f, -1L, FALSE)); + + switch (j) + { + case TOKEN_EQ: case TOKEN_NE: + if (is_unknown(e)) + v = -1; /* means unknown */ + else if (!is_really_simple(e) || reloc_value(e) != 0) + v = (j == TOKEN_NE); /* unequal, so return TRUE if NE */ + else + v = (j == TOKEN_EQ); /* equal, so return TRUE if EQ */ + break; + default: + if (is_unknown(e)) + v = -1; /* means unknown */ + else if (!is_really_simple(e)) { + error(ERR_NONFATAL, "`%s': operands differ by a non-scalar", + (j == TOKEN_LE ? "<=" : j == TOKEN_LT ? "<" : + j == TOKEN_GE ? ">=" : ">")); + v = 0; /* must set it to _something_ */ + } else { + int vv = reloc_value(e); + if (vv == 0) + v = (j == TOKEN_LE || j == TOKEN_GE); + else if (vv > 0) + v = (j == TOKEN_GE || j == TOKEN_GT); + else /* vv < 0 */ + v = (j == TOKEN_LE || j == TOKEN_LT); + } + break; + } + + if (v == -1) + e = unknown_expr(); + else + e = scalarvect(v); + } + return e; +} + +static expr *expr0(int critical) +{ + expr *e, *f; + + e = expr1(critical); + if (!e) + return NULL; + + while (i == '|') + { + i = scan(scpriv, tokval); + f = expr1(critical); + if (!f) + return NULL; + if (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f))) + { + error(ERR_NONFATAL, "`|' operator may only be applied to" + " scalar values"); + } + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect (reloc_value(e) | reloc_value(f)); + } + return e; +} + +static expr *expr1(int critical) +{ + expr *e, *f; + + e = expr2(critical); + if (!e) + return NULL; + + while (i == '^') { + i = scan(scpriv, tokval); + f = expr2(critical); + if (!f) + return NULL; + if (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f))) + { + error(ERR_NONFATAL, "`^' operator may only be applied to" + " scalar values"); + } + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect (reloc_value(e) ^ reloc_value(f)); + } + return e; +} + +static expr *expr2(int critical) +{ + expr *e, *f; + + e = expr3(critical); + if (!e) + return NULL; + + while (i == '&') { + i = scan(scpriv, tokval); + f = expr3(critical); + if (!f) + return NULL; + if (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f))) + { + error(ERR_NONFATAL, "`&' operator may only be applied to" + " scalar values"); + } + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect (reloc_value(e) & reloc_value(f)); + } + return e; +} + +static expr *expr3(int critical) +{ + expr *e, *f; + + e = expr4(critical); + if (!e) + return NULL; + + while (i == TOKEN_SHL || i == TOKEN_SHR) + { + int j = i; + i = scan(scpriv, tokval); + f = expr4(critical); + if (!f) + return NULL; + if (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f))) + { + error(ERR_NONFATAL, "shift operator may only be applied to" + " scalar values"); + } else if (is_just_unknown(e) || is_just_unknown(f)) { + e = unknown_expr(); + } else switch (j) { + case TOKEN_SHL: + e = scalarvect (reloc_value(e) << reloc_value(f)); + break; + case TOKEN_SHR: + e = scalarvect (((unsigned long)reloc_value(e)) >> + reloc_value(f)); + break; + } + } + return e; +} + +static expr *expr4(int critical) +{ + expr *e, *f; + + e = expr5(critical); + if (!e) + return NULL; + while (i == '+' || i == '-') + { + int j = i; + i = scan(scpriv, tokval); + f = expr5(critical); + if (!f) + return NULL; + switch (j) { + case '+': + e = add_vectors (e, f); + break; + case '-': + e = add_vectors (e, scalar_mult(f, -1L, FALSE)); + break; + } + } + return e; +} + +static expr *expr5(int critical) +{ + expr *e, *f; + + e = expr6(critical); + if (!e) + return NULL; + while (i == '*' || i == '/' || i == '%' || + i == TOKEN_SDIV || i == TOKEN_SMOD) + { + int j = i; + i = scan(scpriv, tokval); + f = expr6(critical); + if (!f) + return NULL; + if (j != '*' && (!(is_simple(e) || is_just_unknown(e)) || + !(is_simple(f) || is_just_unknown(f)))) + { + error(ERR_NONFATAL, "division operator may only be applied to" + " scalar values"); + return NULL; + } + if (j != '*' && !is_unknown(f) && reloc_value(f) == 0) { + error(ERR_NONFATAL, "division by zero"); + return NULL; + } + switch (j) { + case '*': + if (is_simple(e)) + e = scalar_mult (f, reloc_value(e), TRUE); + else if (is_simple(f)) + e = scalar_mult (e, reloc_value(f), TRUE); + else if (is_just_unknown(e) && is_just_unknown(f)) + e = unknown_expr(); + else { + error(ERR_NONFATAL, "unable to multiply two " + "non-scalar objects"); + return NULL; + } + break; + case '/': + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect (((unsigned long)reloc_value(e)) / + ((unsigned long)reloc_value(f))); + break; + case '%': + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect (((unsigned long)reloc_value(e)) % + ((unsigned long)reloc_value(f))); + break; + case TOKEN_SDIV: + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect (((signed long)reloc_value(e)) / + ((signed long)reloc_value(f))); + break; + case TOKEN_SMOD: + if (is_just_unknown(e) || is_just_unknown(f)) + e = unknown_expr(); + else + e = scalarvect (((signed long)reloc_value(e)) % + ((signed long)reloc_value(f))); + break; + } + } + return e; +} + +static expr *expr6(int critical) +{ + long type; + expr *e; + long label_seg, label_ofs; + + if (i == '-') { + i = scan(scpriv, tokval); + e = expr6(critical); + if (!e) + return NULL; + return scalar_mult (e, -1L, FALSE); + } else if (i == '+') { + i = scan(scpriv, tokval); + return expr6(critical); + } else if (i == '~') { + i = scan(scpriv, tokval); + e = expr6(critical); + if (!e) + return NULL; + if (is_just_unknown(e)) + return unknown_expr(); + else if (!is_simple(e)) { + error(ERR_NONFATAL, "`~' operator may only be applied to" + " scalar values"); + return NULL; + } + return scalarvect(~reloc_value(e)); + } else if (i == TOKEN_SEG) { + i = scan(scpriv, tokval); + e = expr6(critical); + if (!e) + return NULL; + e = segment_part(e); + if (!e) + return NULL; + if (is_unknown(e) && critical) { + error(ERR_NONFATAL, "unable to determine segment base"); + return NULL; + } + return e; + } else if (i == '(') { + i = scan(scpriv, tokval); + e = bexpr(critical); + if (!e) + return NULL; + if (i != ')') { + error(ERR_NONFATAL, "expecting `)'"); + return NULL; + } + i = scan(scpriv, tokval); + return e; + } + else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID || + i == TOKEN_HERE || i == TOKEN_BASE) + { + begintemp(); + switch (i) { + case TOKEN_NUM: + addtotemp(EXPR_SIMPLE, tokval->t_integer); + break; + case TOKEN_REG: + addtotemp(tokval->t_integer, 1L); + if (hint && hint->type == EAH_NOHINT) + hint->base = tokval->t_integer, hint->type = EAH_MAKEBASE; + break; + case TOKEN_ID: + case TOKEN_HERE: + case TOKEN_BASE: + /* + * If !location->known, this indicates that no + * symbol, Here or Base references are valid because we + * are in preprocess-only mode. + */ + if (!location->known) { + error(ERR_NONFATAL, + "%s not supported in preprocess-only mode", + (i == TOKEN_ID ? "symbol references" : + i == TOKEN_HERE ? "`$'" : "`$$'")); + addtotemp(EXPR_UNKNOWN, 1L); + break; + } + + type = EXPR_SIMPLE; /* might get overridden by UNKNOWN */ + if (i == TOKEN_BASE) + { + label_seg = in_abs_seg ? abs_seg : location->segment; + label_ofs = 0; + } else if (i == TOKEN_HERE) { + label_seg = in_abs_seg ? abs_seg : location->segment; + label_ofs = in_abs_seg ? abs_offset : location->offset; + } else { + if (!labelfunc(tokval->t_charptr,&label_seg,&label_ofs)) + { + if (critical == 2) { + error (ERR_NONFATAL, "symbol `%s' undefined", + tokval->t_charptr); + return NULL; + } else if (critical == 1) { + error (ERR_NONFATAL, + "symbol `%s' not defined before use", + tokval->t_charptr); + return NULL; + } else { + if (opflags) + *opflags |= 1; + type = EXPR_UNKNOWN; + label_seg = NO_SEG; + label_ofs = 1; + } + } + if (opflags && is_extern (tokval->t_charptr)) + *opflags |= OPFLAG_EXTERN; + } + addtotemp(type, label_ofs); + if (label_seg!=NO_SEG) + addtotemp(EXPR_SEGBASE + label_seg, 1L); + break; + } + i = scan(scpriv, tokval); + return finishtemp(); + } else { + error(ERR_NONFATAL, "expression syntax error"); + return NULL; + } +} + +void eval_global_info (struct ofmt *output, lfunc lookup_label, loc_t *locp) +{ + outfmt = output; + labelfunc = lookup_label; + location = locp; +} + +expr *evaluate (scanner sc, void *scprivate, struct tokenval *tv, + int *fwref, int critical, efunc report_error, + struct eval_hints *hints) +{ + expr *e; + expr *f = NULL; + + hint = hints; + if (hint) + hint->type = EAH_NOHINT; + + if (critical & CRITICAL) { + critical &= ~CRITICAL; + bexpr = rexp0; + } else + bexpr = expr0; + + scan = sc; + scpriv = scprivate; + tokval = tv; + error = report_error; + opflags = fwref; + + if (tokval->t_type == TOKEN_INVALID) + i = scan(scpriv, tokval); + else + i = tokval->t_type; + + while (ntempexprs) /* initialise temporary storage */ + nasm_free (tempexprs[--ntempexprs]); + + e = bexpr (critical); + if (!e) + return NULL; + + if (i == TOKEN_WRT) { + i = scan(scpriv, tokval); /* eat the WRT */ + f = expr6 (critical); + if (!f) + return NULL; + } + e = scalar_mult (e, 1L, FALSE); /* strip far-absolute segment part */ + if (f) { + expr *g; + if (is_just_unknown(f)) + g = unknown_expr(); + else { + long value; + begintemp(); + if (!is_reloc(f)) { + error(ERR_NONFATAL, "invalid right-hand operand to WRT"); + return NULL; + } + value = reloc_seg(f); + if (value == NO_SEG) + value = reloc_value(f) | SEG_ABS; + else if (!(value & SEG_ABS) && !(value % 2) && critical) + { + error(ERR_NONFATAL, "invalid right-hand operand to WRT"); + return NULL; + } + addtotemp(EXPR_WRT, value); + g = finishtemp(); + } + e = add_vectors (e, g); + } + return e; +} diff --git a/src/preprocs/nasm/nasm-eval.h b/src/preprocs/nasm/nasm-eval.h new file mode 100644 index 00000000..a933cbfd --- /dev/null +++ b/src/preprocs/nasm/nasm-eval.h @@ -0,0 +1,28 @@ +/* eval.h header file for eval.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_EVAL_H +#define NASM_EVAL_H + +/* + * Called once to tell the evaluator what output format is + * providing segment-base details, and what function can be used to + * look labels up. + */ +void eval_global_info (struct ofmt *output, lfunc lookup_label, loc_t *locp); + +/* + * The evaluator itself. + */ +expr *evaluate (scanner sc, void *scprivate, struct tokenval *tv, + int *fwref, int critical, efunc report_error, + struct eval_hints *hints); + +void eval_cleanup(void); + +#endif diff --git a/src/preprocs/nasm/nasm-pp.c b/src/preprocs/nasm/nasm-pp.c new file mode 100644 index 00000000..0770812e --- /dev/null +++ b/src/preprocs/nasm/nasm-pp.c @@ -0,0 +1,4459 @@ +/* -*- mode: c; c-file-style: "bsd" -*- */ +/* preproc.c macro preprocessor for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version 18/iii/97 by Simon Tatham + */ + +/* Typical flow of text through preproc + * + * pp_getline gets tokenised lines, either + * + * from a macro expansion + * + * or + * { + * read_line gets raw text from stdmacpos, or predef, or current input file + * tokenise converts to tokens + * } + * + * expand_mmac_params is used to expand %1 etc., unless a macro is being + * defined or a false conditional is being processed + * (%0, %1, %+1, %-1, %%foo + * + * do_directive checks for directives + * + * expand_smacro is used to expand single line macros + * + * expand_mmacro is used to expand multi-line macros + * + * detoken is used to convert the line back to text + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" + +typedef struct SMacro SMacro; +typedef struct MMacro MMacro; +typedef struct Context Context; +typedef struct Token Token; +typedef struct Blocks Blocks; +typedef struct Line Line; +typedef struct Include Include; +typedef struct Cond Cond; +typedef struct IncPath IncPath; + +/* + * Store the definition of a single-line macro. + */ +struct SMacro +{ + SMacro *next; + char *name; + int casesense; + int nparam; + int in_progress; + Token *expansion; +}; + +/* + * Store the definition of a multi-line macro. This is also used to + * store the interiors of `%rep...%endrep' blocks, which are + * effectively self-re-invoking multi-line macros which simply + * don't have a name or bother to appear in the hash tables. %rep + * blocks are signified by having a NULL `name' field. + * + * In a MMacro describing a `%rep' block, the `in_progress' field + * isn't merely boolean, but gives the number of repeats left to + * run. + * + * The `next' field is used for storing MMacros in hash tables; the + * `next_active' field is for stacking them on istk entries. + * + * When a MMacro is being expanded, `params', `iline', `nparam', + * `paramlen', `rotate' and `unique' are local to the invocation. + */ +struct MMacro +{ + MMacro *next; + char *name; + int casesense; + int nparam_min, nparam_max; + int plus; /* is the last parameter greedy? */ + int nolist; /* is this macro listing-inhibited? */ + int in_progress; + Token *dlist; /* All defaults as one list */ + Token **defaults; /* Parameter default pointers */ + int ndefs; /* number of default parameters */ + Line *expansion; + + MMacro *next_active; + MMacro *rep_nest; /* used for nesting %rep */ + Token **params; /* actual parameters */ + Token *iline; /* invocation line */ + int nparam, rotate, *paramlen; + unsigned long unique; + int lineno; /* Current line number on expansion */ +}; + +/* + * The context stack is composed of a linked list of these. + */ +struct Context +{ + Context *next; + SMacro *localmac; + char *name; + unsigned long number; +}; + +/* + * This is the internal form which we break input lines up into. + * Typically stored in linked lists. + * + * Note that `type' serves a double meaning: TOK_SMAC_PARAM is not + * necessarily used as-is, but is intended to denote the number of + * the substituted parameter. So in the definition + * + * %define a(x,y) ( (x) & ~(y) ) + * + * the token representing `x' will have its type changed to + * TOK_SMAC_PARAM, but the one representing `y' will be + * TOK_SMAC_PARAM+1. + * + * TOK_INTERNAL_STRING is a dirty hack: it's a single string token + * which doesn't need quotes around it. Used in the pre-include + * mechanism as an alternative to trying to find a sensible type of + * quote to use on the filename we were passed. + */ +struct Token +{ + Token *next; + char *text; + SMacro *mac; /* associated macro for TOK_SMAC_END */ + int type; +}; +enum +{ + TOK_WHITESPACE = 1, TOK_COMMENT, TOK_ID, TOK_PREPROC_ID, TOK_STRING, + TOK_NUMBER, TOK_SMAC_END, TOK_OTHER, TOK_SMAC_PARAM, + TOK_INTERNAL_STRING +}; + +/* + * Multi-line macro definitions are stored as a linked list of + * these, which is essentially a container to allow several linked + * lists of Tokens. + * + * Note that in this module, linked lists are treated as stacks + * wherever possible. For this reason, Lines are _pushed_ on to the + * `expansion' field in MMacro structures, so that the linked list, + * if walked, would give the macro lines in reverse order; this + * means that we can walk the list when expanding a macro, and thus + * push the lines on to the `expansion' field in _istk_ in reverse + * order (so that when popped back off they are in the right + * order). It may seem cockeyed, and it relies on my design having + * an even number of steps in, but it works... + * + * Some of these structures, rather than being actual lines, are + * markers delimiting the end of the expansion of a given macro. + * This is for use in the cycle-tracking and %rep-handling code. + * Such structures have `finishes' non-NULL, and `first' NULL. All + * others have `finishes' NULL, but `first' may still be NULL if + * the line is blank. + */ +struct Line +{ + Line *next; + MMacro *finishes; + Token *first; +}; + +/* + * To handle an arbitrary level of file inclusion, we maintain a + * stack (ie linked list) of these things. + */ +struct Include +{ + Include *next; + FILE *fp; + Cond *conds; + Line *expansion; + char *fname; + int lineno, lineinc; + MMacro *mstk; /* stack of active macros/reps */ +}; + +/* + * Include search path. This is simply a list of strings which get + * prepended, in turn, to the name of an include file, in an + * attempt to find the file if it's not in the current directory. + */ +struct IncPath +{ + IncPath *next; + char *path; +}; + +/* + * Conditional assembly: we maintain a separate stack of these for + * each level of file inclusion. (The only reason we keep the + * stacks separate is to ensure that a stray `%endif' in a file + * included from within the true branch of a `%if' won't terminate + * it and cause confusion: instead, rightly, it'll cause an error.) + */ +struct Cond +{ + Cond *next; + int state; +}; +enum +{ + /* + * These states are for use just after %if or %elif: IF_TRUE + * means the condition has evaluated to truth so we are + * currently emitting, whereas IF_FALSE means we are not + * currently emitting but will start doing so if a %else comes + * up. In these states, all directives are admissible: %elif, + * %else and %endif. (And of course %if.) + */ + COND_IF_TRUE, COND_IF_FALSE, + /* + * These states come up after a %else: ELSE_TRUE means we're + * emitting, and ELSE_FALSE means we're not. In ELSE_* states, + * any %elif or %else will cause an error. + */ + COND_ELSE_TRUE, COND_ELSE_FALSE, + /* + * This state means that we're not emitting now, and also that + * nothing until %endif will be emitted at all. It's for use in + * two circumstances: (i) when we've had our moment of emission + * and have now started seeing %elifs, and (ii) when the + * condition construct in question is contained within a + * non-emitting branch of a larger condition construct. + */ + COND_NEVER +}; +#define emitting(x) ( (x) == COND_IF_TRUE || (x) == COND_ELSE_TRUE ) + +/* + * These defines are used as the possible return values for do_directive + */ +#define NO_DIRECTIVE_FOUND 0 +#define DIRECTIVE_FOUND 1 + +/* + * Condition codes. Note that we use c_ prefix not C_ because C_ is + * used in nasm.h for the "real" condition codes. At _this_ level, + * we treat CXZ and ECXZ as condition codes, albeit non-invertible + * ones, so we need a different enum... + */ +static const char *conditions[] = { + "a", "ae", "b", "be", "c", "cxz", "e", "ecxz", "g", "ge", "l", "le", + "na", "nae", "nb", "nbe", "nc", "ne", "ng", "nge", "nl", "nle", "no", + "np", "ns", "nz", "o", "p", "pe", "po", "s", "z" +}; +enum +{ + c_A, c_AE, c_B, c_BE, c_C, c_CXZ, c_E, c_ECXZ, c_G, c_GE, c_L, c_LE, + c_NA, c_NAE, c_NB, c_NBE, c_NC, c_NE, c_NG, c_NGE, c_NL, c_NLE, c_NO, + c_NP, c_NS, c_NZ, c_O, c_P, c_PE, c_PO, c_S, c_Z +}; +static int inverse_ccs[] = { + c_NA, c_NAE, c_NB, c_NBE, c_NC, -1, c_NE, -1, c_NG, c_NGE, c_NL, c_NLE, + c_A, c_AE, c_B, c_BE, c_C, c_E, c_G, c_GE, c_L, c_LE, c_O, c_P, c_S, + c_Z, c_NO, c_NP, c_PO, c_PE, c_NS, c_NZ +}; + +/* + * Directive names. + */ +static const char *directives[] = { + "%arg", + "%assign", "%clear", "%define", "%elif", "%elifctx", "%elifdef", + "%elifid", "%elifidn", "%elifidni", "%elifmacro", "%elifnctx", "%elifndef", + "%elifnid", "%elifnidn", "%elifnidni", "%elifnmacro", "%elifnnum", "%elifnstr", + "%elifnum", "%elifstr", "%else", "%endif", "%endm", "%endmacro", + "%endrep", "%error", "%exitrep", "%iassign", "%idefine", "%if", + "%ifctx", "%ifdef", "%ifid", "%ifidn", "%ifidni", "%ifmacro", "%ifnctx", + "%ifndef", "%ifnid", "%ifnidn", "%ifnidni", "%ifnmacro", "%ifnnum", + "%ifnstr", "%ifnum", "%ifstr", "%imacro", "%include", + "%ixdefine", "%line", + "%local", + "%macro", "%pop", "%push", "%rep", "%repl", "%rotate", + "%stacksize", + "%strlen", "%substr", "%undef", "%xdefine" +}; +enum +{ + PP_ARG, + PP_ASSIGN, PP_CLEAR, PP_DEFINE, PP_ELIF, PP_ELIFCTX, PP_ELIFDEF, + PP_ELIFID, PP_ELIFIDN, PP_ELIFIDNI, PP_ELIFMACRO, PP_ELIFNCTX, PP_ELIFNDEF, + PP_ELIFNID, PP_ELIFNIDN, PP_ELIFNIDNI, PP_ELIFNMACRO, PP_ELIFNNUM, PP_ELIFNSTR, + PP_ELIFNUM, PP_ELIFSTR, PP_ELSE, PP_ENDIF, PP_ENDM, PP_ENDMACRO, + PP_ENDREP, PP_ERROR, PP_EXITREP, PP_IASSIGN, PP_IDEFINE, PP_IF, + PP_IFCTX, PP_IFDEF, PP_IFID, PP_IFIDN, PP_IFIDNI, PP_IFMACRO, PP_IFNCTX, + PP_IFNDEF, PP_IFNID, PP_IFNIDN, PP_IFNIDNI, PP_IFNMACRO, PP_IFNNUM, + PP_IFNSTR, PP_IFNUM, PP_IFSTR, PP_IMACRO, PP_INCLUDE, + PP_IXDEFINE, PP_LINE, + PP_LOCAL, + PP_MACRO, PP_POP, PP_PUSH, PP_REP, PP_REPL, PP_ROTATE, + PP_STACKSIZE, + PP_STRLEN, PP_SUBSTR, PP_UNDEF, PP_XDEFINE +}; + +/* If this is a an IF, ELIF, ELSE or ENDIF keyword */ +static int is_condition(int arg) +{ + return ((arg >= PP_ELIF) && (arg <= PP_ENDIF)) || + ((arg >= PP_IF) && (arg <= PP_IFSTR)); +} + +/* For TASM compatibility we need to be able to recognise TASM compatible + * conditional compilation directives. Using the NASM pre-processor does + * not work, so we look for them specifically from the following list and + * then jam in the equivalent NASM directive into the input stream. + */ + +#ifndef MAX +# define MAX(a,b) ( ((a) > (b)) ? (a) : (b)) +#endif + +enum +{ + TM_ARG, TM_ELIF, TM_ELSE, TM_ENDIF, TM_IF, TM_IFDEF, TM_IFDIFI, + TM_IFNDEF, TM_INCLUDE, TM_LOCAL +}; + +static const char *tasm_directives[] = { + "arg", "elif", "else", "endif", "if", "ifdef", "ifdifi", + "ifndef", "include", "local" +}; + +static int StackSize = 4; +static char *StackPointer = "ebp"; +static int ArgOffset = 8; +static int LocalOffset = 4; + + +static Context *cstk; +static Include *istk; +static IncPath *ipath = NULL; + +static efunc _error; /* Pointer to client-provided error reporting function */ +static evalfunc evaluate; + +static int pass; /* HACK: pass 0 = generate dependencies only */ + +static unsigned long unique; /* unique identifier numbers */ + +static Line *predef = NULL; + +static ListGen *list; + +/* + * The number of hash values we use for the macro lookup tables. + * FIXME: We should *really* be able to configure this at run time, + * or even have the hash table automatically expanding when necessary. + */ +#define NHASH 31 + +/* + * The current set of multi-line macros we have defined. + */ +static MMacro *mmacros[NHASH]; + +/* + * The current set of single-line macros we have defined. + */ +static SMacro *smacros[NHASH]; + +/* + * The multi-line macro we are currently defining, or the %rep + * block we are currently reading, if any. + */ +static MMacro *defining; + +/* + * The number of macro parameters to allocate space for at a time. + */ +#define PARAM_DELTA 16 + +/* + * The standard macro set: defined as `static char *stdmac[]'. Also + * gives our position in the macro set, when we're processing it. + */ +#include "macros.c" +static const char **stdmacpos; + +/* + * The extra standard macros that come from the object format, if + * any. + */ +static const char **extrastdmac = NULL; +int any_extrastdmac; + +/* + * Tokens are allocated in blocks to improve speed + */ +#define TOKEN_BLOCKSIZE 4096 +static Token *freeTokens = NULL; +struct Blocks { + Blocks *next; + void *chunk; +}; + +static Blocks blocks = { NULL, NULL }; + +/* + * Forward declarations. + */ +static Token *expand_mmac_params(Token * tline); +static Token *expand_smacro(Token * tline); +static Token *expand_id(Token * tline); +static Context *get_ctx(char *name, int all_contexts); +static void make_tok_num(Token * tok, long val); +static void error(int severity, const char *fmt, ...); +static void *new_Block(size_t size); +static void delete_Blocks(void); +static Token *new_Token(Token * next, int type, char *text, int txtlen); +static Token *delete_Token(Token * t); + +/* + * Macros for safe checking of token pointers, avoid *(NULL) + */ +#define tok_type_(x,t) ((x) && (x)->type == (t)) +#define skip_white_(x) if (tok_type_((x), TOK_WHITESPACE)) (x)=(x)->next +#define tok_is_(x,v) (tok_type_((x), TOK_OTHER) && !strcmp((x)->text,(v))) +#define tok_isnt_(x,v) ((x) && ((x)->type!=TOK_OTHER || strcmp((x)->text,(v)))) + +/* Handle TASM specific directives, which do not contain a % in + * front of them. We do it here because I could not find any other + * place to do it for the moment, and it is a hack (ideally it would + * be nice to be able to use the NASM pre-processor to do it). + */ +static char * +check_tasm_directive(char *line) +{ + int i, j, k, m, len; + char *p = line, *oldline, oldchar; + + /* Skip whitespace */ + while (isspace(*p) && *p != 0) + p++; + + /* Binary search for the directive name */ + i = -1; + j = elements(tasm_directives); + len = 0; + while (!isspace(p[len]) && p[len] != 0) + len++; + if (len) + { + oldchar = p[len]; + p[len] = 0; + while (j - i > 1) + { + k = (j + i) / 2; + m = nasm_stricmp(p, tasm_directives[k]); + if (m == 0) + { + /* We have found a directive, so jam a % in front of it + * so that NASM will then recognise it as one if it's own. + */ + p[len] = oldchar; + len = strlen(p); + oldline = line; + line = nasm_malloc(len + 2); + line[0] = '%'; + if (k == TM_IFDIFI) + { + /* NASM does not recognise IFDIFI, so we convert it to + * %ifdef BOGUS. This is not used in NASM comaptible + * code, but does need to parse for the TASM macro + * package. + */ + strcpy(line + 1, "ifdef BOGUS"); + } + else + { + memcpy(line + 1, p, len + 1); + } + nasm_free(oldline); + return line; + } + else if (m < 0) + { + j = k; + } + else + i = k; + } + p[len] = oldchar; + } + return line; +} + +/* + * The pre-preprocessing stage... This function translates line + * number indications as they emerge from GNU cpp (`# lineno "file" + * flags') into NASM preprocessor line number indications (`%line + * lineno file'). + */ +static char * +prepreproc(char *line) +{ + int lineno, fnlen; + char *fname, *oldline; + + if (line[0] == '#' && line[1] == ' ') + { + oldline = line; + fname = oldline + 2; + lineno = atoi(fname); + fname += strspn(fname, "0123456789 "); + if (*fname == '"') + fname++; + fnlen = strcspn(fname, "\""); + line = nasm_malloc(20 + fnlen); + sprintf(line, "%%line %d %.*s", lineno, fnlen, fname); + nasm_free(oldline); + } + if (tasm_compatible_mode) + return check_tasm_directive(line); + return line; +} + +/* + * The hash function for macro lookups. Note that due to some + * macros having case-insensitive names, the hash function must be + * invariant under case changes. We implement this by applying a + * perfectly normal hash function to the uppercase of the string. + */ +static int +hash(char *s) +{ + unsigned int h = 0; + int i = 0; + /* + * Powers of three, mod 31. + */ + static const int multipliers[] = { + 1, 3, 9, 27, 19, 26, 16, 17, 20, 29, 25, 13, 8, 24, 10, + 30, 28, 22, 4, 12, 5, 15, 14, 11, 2, 6, 18, 23, 7, 21 + }; + + + while (*s) + { + h += multipliers[i] * (unsigned char) (toupper(*s)); + s++; + if (++i >= elements(multipliers)) + i = 0; + } + h %= NHASH; + return h; +} + +/* + * Free a linked list of tokens. + */ +static void +free_tlist(Token * list) +{ + while (list) + { + list = delete_Token(list); + } +} + +/* + * Free a linked list of lines. + */ +static void +free_llist(Line * list) +{ + Line *l; + while (list) + { + l = list; + list = list->next; + free_tlist(l->first); + nasm_free(l); + } +} + +/* + * Free an MMacro + */ +static void +free_mmacro(MMacro * m) +{ + nasm_free(m->name); + free_tlist(m->dlist); + nasm_free(m->defaults); + free_llist(m->expansion); + nasm_free(m); +} + +/* + * Pop the context stack. + */ +static void +ctx_pop(void) +{ + Context *c = cstk; + SMacro *smac, *s; + + cstk = cstk->next; + smac = c->localmac; + while (smac) + { + s = smac; + smac = smac->next; + nasm_free(s->name); + free_tlist(s->expansion); + nasm_free(s); + } + nasm_free(c->name); + nasm_free(c); +} + +#define BUF_DELTA 512 +/* + * Read a line from the top file in istk, handling multiple CR/LFs + * at the end of the line read, and handling spurious ^Zs. Will + * return lines from the standard macro set if this has not already + * been done. + */ +static char * +read_line(void) +{ + char *buffer, *p, *q; + int bufsize, continued_count; + + if (stdmacpos) + { + if (*stdmacpos) + { + char *ret = nasm_strdup(*stdmacpos++); + if (!*stdmacpos && any_extrastdmac) + { + stdmacpos = extrastdmac; + any_extrastdmac = FALSE; + return ret; + } + /* + * Nasty hack: here we push the contents of `predef' on + * to the top-level expansion stack, since this is the + * most convenient way to implement the pre-include and + * pre-define features. + */ + if (!*stdmacpos) + { + Line *pd, *l; + Token *head, **tail, *t; + + for (pd = predef; pd; pd = pd->next) + { + head = NULL; + tail = &head; + for (t = pd->first; t; t = t->next) + { + *tail = new_Token(NULL, t->type, t->text, 0); + tail = &(*tail)->next; + } + l = nasm_malloc(sizeof(Line)); + l->next = istk->expansion; + l->first = head; + l->finishes = FALSE; + istk->expansion = l; + } + } + return ret; + } + else + { + stdmacpos = NULL; + } + } + + bufsize = BUF_DELTA; + buffer = nasm_malloc(BUF_DELTA); + p = buffer; + continued_count = 0; + while (1) + { + q = fgets(p, bufsize - (p - buffer), istk->fp); + if (!q) + break; + p += strlen(p); + if (p > buffer && p[-1] == '\n') + { + /* Convert backslash-CRLF line continuation sequences into + nothing at all (for DOS and Windows) */ + if (((p - 2) > buffer) && (p[-3] == '\\') && (p[-2] == '\r')) { + p -= 3; + *p = 0; + continued_count++; + } + /* Also convert backslash-LF line continuation sequences into + nothing at all (for Unix) */ + else if (((p - 1) > buffer) && (p[-2] == '\\')) { + p -= 2; + *p = 0; + continued_count++; + } + else { + break; + } + } + if (p - buffer > bufsize - 10) + { + long offset = p - buffer; + bufsize += BUF_DELTA; + buffer = nasm_realloc(buffer, bufsize); + p = buffer + offset; /* prevent stale-pointer problems */ + } + } + + if (!q && p == buffer) + { + nasm_free(buffer); + return NULL; + } + + src_set_linnum(src_get_linnum() + istk->lineinc + (continued_count * istk->lineinc)); + + /* + * Play safe: remove CRs as well as LFs, if any of either are + * present at the end of the line. + */ + while (--p >= buffer && (*p == '\n' || *p == '\r')) + *p = '\0'; + + /* + * Handle spurious ^Z, which may be inserted into source files + * by some file transfer utilities. + */ + buffer[strcspn(buffer, "\032")] = '\0'; + + list->line(LIST_READ, buffer); + + return buffer; +} + +/* + * Tokenise a line of text. This is a very simple process since we + * don't need to parse the value out of e.g. numeric tokens: we + * simply split one string into many. + */ +static Token * +tokenise(char *line) +{ + char *p = line; + int type; + Token *list = NULL; + Token *t, **tail = &list; + + while (*line) + { + p = line; + if (*p == '%') + { + p++; + if ( isdigit(*p) || + ((*p == '-' || *p == '+') && isdigit(p[1])) || + ((*p == '+') && (isspace(p[1]) || !p[1]))) + { + do + { + p++; + } + while (isdigit(*p)); + type = TOK_PREPROC_ID; + } + else if (*p == '{') + { + p++; + while (*p && *p != '}') + { + p[-1] = *p; + p++; + } + p[-1] = '\0'; + if (*p) + p++; + type = TOK_PREPROC_ID; + } + else if (isidchar(*p) || + ((*p == '!' || *p == '%' || *p == '$') && + isidchar(p[1]))) + { + do + { + p++; + } + while (isidchar(*p)); + type = TOK_PREPROC_ID; + } + else + { + type = TOK_OTHER; + if (*p == '%') + p++; + } + } + else if (isidstart(*p) || (*p == '$' && isidstart(p[1]))) + { + type = TOK_ID; + p++; + while (*p && isidchar(*p)) + p++; + } + else if (*p == '\'' || *p == '"') + { + /* + * A string token. + */ + char c = *p; + p++; + type = TOK_STRING; + while (*p && *p != c) + p++; + if (*p) + { + p++; + } + else + { + error(ERR_WARNING, "unterminated string"); + } + } + else if (isnumstart(*p)) + { + /* + * A number token. + */ + type = TOK_NUMBER; + p++; + while (*p && isnumchar(*p)) + p++; + } + else if (isspace(*p)) + { + type = TOK_WHITESPACE; + p++; + while (*p && isspace(*p)) + p++; + /* + * Whitespace just before end-of-line is discarded by + * pretending it's a comment; whitespace just before a + * comment gets lumped into the comment. + */ + if (!*p || *p == ';') + { + type = TOK_COMMENT; + while (*p) + p++; + } + } + else if (*p == ';') + { + type = TOK_COMMENT; + while (*p) + p++; + } + else + { + /* + * Anything else is an operator of some kind. We check + * for all the double-character operators (>>, <<, //, + * %%, <=, >=, ==, !=, <>, &&, ||, ^^), but anything + * else is a single-character operator. + */ + type = TOK_OTHER; + if ((p[0] == '>' && p[1] == '>') || + (p[0] == '<' && p[1] == '<') || + (p[0] == '/' && p[1] == '/') || + (p[0] == '<' && p[1] == '=') || + (p[0] == '>' && p[1] == '=') || + (p[0] == '=' && p[1] == '=') || + (p[0] == '!' && p[1] == '=') || + (p[0] == '<' && p[1] == '>') || + (p[0] == '&' && p[1] == '&') || + (p[0] == '|' && p[1] == '|') || + (p[0] == '^' && p[1] == '^')) + { + p++; + } + p++; + } + if (type != TOK_COMMENT) + { + *tail = t = new_Token(NULL, type, line, p - line); + tail = &t->next; + } + line = p; + } + return list; +} + +/* + * this function allocates a new managed block of memory and + * returns a pointer to the block. The managed blocks are + * deleted only all at once by the delete_Blocks function. + */ +static void * +new_Block(size_t size) +{ + Blocks *b = &blocks; + + /* first, get to the end of the linked list */ + while (b->next) + b = b->next; + /* now allocate the requested chunk */ + b->chunk = nasm_malloc(size); + + /* now allocate a new block for the next request */ + b->next = nasm_malloc(sizeof(Blocks)); + /* and initialize the contents of the new block */ + b->next->next = NULL; + b->next->chunk = NULL; + return b->chunk; +} + +/* + * this function deletes all managed blocks of memory + */ +static void +delete_Blocks(void) +{ + Blocks *a,*b = &blocks; + + /* + * keep in mind that the first block, pointed to by blocks + * is a static and not dynamically allocated, so we don't + * free it. + */ + while (b) + { + if (b->chunk) + nasm_free(b->chunk); + a = b; + b = b->next; + if (a != &blocks) + nasm_free(a); + } +} + +/* + * this function creates a new Token and passes a pointer to it + * back to the caller. It sets the type and text elements, and + * also the mac and next elements to NULL. + */ +static Token * +new_Token(Token * next, int type, char *text, int txtlen) +{ + Token *t; + int i; + + if (freeTokens == NULL) + { + freeTokens = (Token *)new_Block(TOKEN_BLOCKSIZE * sizeof(Token)); + for (i = 0; i < TOKEN_BLOCKSIZE - 1; i++) + freeTokens[i].next = &freeTokens[i + 1]; + freeTokens[i].next = NULL; + } + t = freeTokens; + freeTokens = t->next; + t->next = next; + t->mac = NULL; + t->type = type; + if (type == TOK_WHITESPACE || text == NULL) + { + t->text = NULL; + } + else + { + if (txtlen == 0) + txtlen = strlen(text); + t->text = nasm_malloc(1 + txtlen); + strncpy(t->text, text, txtlen); + t->text[txtlen] = '\0'; + } + return t; +} + +static Token * +delete_Token(Token * t) +{ + Token *next = t->next; + nasm_free(t->text); + t->next = freeTokens; + freeTokens = t; + return next; +} + +/* + * Convert a line of tokens back into text. + * If expand_locals is not zero, identifiers of the form "%$*xxx" + * will be transformed into ..@ctxnum.xxx + */ +static char * +detoken(Token * tlist, int expand_locals) +{ + Token *t; + int len; + char *line, *p; + + len = 0; + for (t = tlist; t; t = t->next) + { + if (t->type == TOK_PREPROC_ID && t->text[1] == '!') + { + char *p = getenv(t->text + 2); + nasm_free(t->text); + if (p) + t->text = nasm_strdup(p); + else + t->text = NULL; + } + /* Expand local macros here and not during preprocessing */ + if (expand_locals && + t->type == TOK_PREPROC_ID && t->text && + t->text[0] == '%' && t->text[1] == '$') + { + Context *ctx = get_ctx(t->text, FALSE); + if (ctx) + { + char buffer[40]; + char *p, *q = t->text + 2; + + q += strspn(q, "$"); + sprintf(buffer, "..@%lu.", ctx->number); + p = nasm_strcat(buffer, q); + nasm_free(t->text); + t->text = p; + } + } + if (t->type == TOK_WHITESPACE) + { + len++; + } + else if (t->text) + { + len += strlen(t->text); + } + } + p = line = nasm_malloc(len + 1); + for (t = tlist; t; t = t->next) + { + if (t->type == TOK_WHITESPACE) + { + *p = ' '; + p++; + *p = '\0'; + } + else if (t->text) + { + strcpy(p, t->text); + p += strlen(p); + } + } + *p = '\0'; + return line; +} + +/* + * A scanner, suitable for use by the expression evaluator, which + * operates on a line of Tokens. Expects a pointer to a pointer to + * the first token in the line to be passed in as its private_data + * field. + */ +static int +ppscan(void *private_data, struct tokenval *tokval) +{ + Token **tlineptr = private_data; + Token *tline; + + do + { + tline = *tlineptr; + *tlineptr = tline ? tline->next : NULL; + } + while (tline && (tline->type == TOK_WHITESPACE || + tline->type == TOK_COMMENT)); + + if (!tline) + return tokval->t_type = TOKEN_EOS; + + if (tline->text[0] == '$' && !tline->text[1]) + return tokval->t_type = TOKEN_HERE; + if (tline->text[0] == '$' && tline->text[1] == '$' && !tline->text[2]) + return tokval->t_type = TOKEN_BASE; + + if (tline->type == TOK_ID) + { + tokval->t_charptr = tline->text; + if (tline->text[0] == '$') + { + tokval->t_charptr++; + return tokval->t_type = TOKEN_ID; + } + + /* + * This is the only special case we actually need to worry + * about in this restricted context. + */ + if (!nasm_stricmp(tline->text, "seg")) + return tokval->t_type = TOKEN_SEG; + + return tokval->t_type = TOKEN_ID; + } + + if (tline->type == TOK_NUMBER) + { + int rn_error; + + tokval->t_integer = readnum(tline->text, &rn_error); + if (rn_error) + return tokval->t_type = TOKEN_ERRNUM; + tokval->t_charptr = NULL; + return tokval->t_type = TOKEN_NUM; + } + + if (tline->type == TOK_STRING) + { + int rn_warn; + char q, *r; + int l; + + r = tline->text; + q = *r++; + l = strlen(r); + + if (l == 0 || r[l - 1] != q) + return tokval->t_type = TOKEN_ERRNUM; + tokval->t_integer = readstrnum(r, l - 1, &rn_warn); + if (rn_warn) + error(ERR_WARNING | ERR_PASS1, "character constant too long"); + tokval->t_charptr = NULL; + return tokval->t_type = TOKEN_NUM; + } + + if (tline->type == TOK_OTHER) + { + if (!strcmp(tline->text, "<<")) + return tokval->t_type = TOKEN_SHL; + if (!strcmp(tline->text, ">>")) + return tokval->t_type = TOKEN_SHR; + if (!strcmp(tline->text, "//")) + return tokval->t_type = TOKEN_SDIV; + if (!strcmp(tline->text, "%%")) + return tokval->t_type = TOKEN_SMOD; + if (!strcmp(tline->text, "==")) + return tokval->t_type = TOKEN_EQ; + if (!strcmp(tline->text, "<>")) + return tokval->t_type = TOKEN_NE; + if (!strcmp(tline->text, "!=")) + return tokval->t_type = TOKEN_NE; + if (!strcmp(tline->text, "<=")) + return tokval->t_type = TOKEN_LE; + if (!strcmp(tline->text, ">=")) + return tokval->t_type = TOKEN_GE; + if (!strcmp(tline->text, "&&")) + return tokval->t_type = TOKEN_DBL_AND; + if (!strcmp(tline->text, "^^")) + return tokval->t_type = TOKEN_DBL_XOR; + if (!strcmp(tline->text, "||")) + return tokval->t_type = TOKEN_DBL_OR; + } + + /* + * We have no other options: just return the first character of + * the token text. + */ + return tokval->t_type = tline->text[0]; +} + +/* + * Compare a string to the name of an existing macro; this is a + * simple wrapper which calls either strcmp or nasm_stricmp + * depending on the value of the `casesense' parameter. + */ +static int +mstrcmp(char *p, char *q, int casesense) +{ + return casesense ? strcmp(p, q) : nasm_stricmp(p, q); +} + +/* + * Return the Context structure associated with a %$ token. Return + * NULL, having _already_ reported an error condition, if the + * context stack isn't deep enough for the supplied number of $ + * signs. + * If all_contexts == TRUE, contexts that enclose current are + * also scanned for such smacro, until it is found; if not - + * only the context that directly results from the number of $'s + * in variable's name. + */ +static Context * +get_ctx(char *name, int all_contexts) +{ + Context *ctx; + SMacro *m; + int i; + + if (!name || name[0] != '%' || name[1] != '$') + return NULL; + + if (!cstk) + { + error(ERR_NONFATAL, "`%s': context stack is empty", name); + return NULL; + } + + for (i = strspn(name + 2, "$"), ctx = cstk; (i > 0) && ctx; i--) + { + ctx = ctx->next; +/* i--; Lino - 02/25/02 */ + } + if (!ctx) + { + error(ERR_NONFATAL, "`%s': context stack is only" + " %d level%s deep", name, i - 1, (i == 2 ? "" : "s")); + return NULL; + } + if (!all_contexts) + return ctx; + + do + { + /* Search for this smacro in found context */ + m = ctx->localmac; + while (m) + { + if (!mstrcmp(m->name, name, m->casesense)) + return ctx; + m = m->next; + } + ctx = ctx->next; + } + while (ctx); + return NULL; +} + +/* Add a slash to the end of a path if it is missing. We use the + * forward slash to make it compatible with Unix systems. + */ +static void +backslash(char *s) +{ + int pos = strlen(s); + if (s[pos - 1] != '\\' && s[pos - 1] != '/') + { + s[pos] = '/'; + s[pos + 1] = '\0'; + } +} + +/* + * Open an include file. This routine must always return a valid + * file pointer if it returns - it's responsible for throwing an + * ERR_FATAL and bombing out completely if not. It should also try + * the include path one by one until it finds the file or reaches + * the end of the path. + */ +static FILE * +inc_fopen(char *file) +{ + FILE *fp; + char *prefix = "", *combine; + IncPath *ip = ipath; + static int namelen = 0; + int len = strlen(file); + + while (1) + { + combine = nasm_malloc(strlen(prefix) + 1 + len + 1); + strcpy(combine, prefix); + if (prefix[0] != 0) + backslash(combine); + strcat(combine, file); + fp = fopen(combine, "r"); + if (pass == 0 && fp) + { + namelen += strlen(combine) + 1; + if (namelen > 62) + { + printf(" \\\n "); + namelen = 2; + } + printf(" %s", combine); + } + nasm_free(combine); + if (fp) + return fp; + if (!ip) + break; + prefix = ip->path; + ip = ip->next; + } + + error(ERR_FATAL, "unable to open include file `%s'", file); + return NULL; /* never reached - placate compilers */ +} + +/* + * Determine if we should warn on defining a single-line macro of + * name `name', with `nparam' parameters. If nparam is 0 or -1, will + * return TRUE if _any_ single-line macro of that name is defined. + * Otherwise, will return TRUE if a single-line macro with either + * `nparam' or no parameters is defined. + * + * If a macro with precisely the right number of parameters is + * defined, or nparam is -1, the address of the definition structure + * will be returned in `defn'; otherwise NULL will be returned. If `defn' + * is NULL, no action will be taken regarding its contents, and no + * error will occur. + * + * Note that this is also called with nparam zero to resolve + * `ifdef'. + * + * If you already know which context macro belongs to, you can pass + * the context pointer as first parameter; if you won't but name begins + * with %$ the context will be automatically computed. If all_contexts + * is true, macro will be searched in outer contexts as well. + */ +static int +smacro_defined(Context * ctx, char *name, int nparam, SMacro ** defn, + int nocase) +{ + SMacro *m; + + if (ctx) + m = ctx->localmac; + else if (name[0] == '%' && name[1] == '$') + { + if (cstk) + ctx = get_ctx(name, FALSE); + if (!ctx) + return FALSE; /* got to return _something_ */ + m = ctx->localmac; + } + else + m = smacros[hash(name)]; + + while (m) + { + if (!mstrcmp(m->name, name, m->casesense && nocase) && + (nparam <= 0 || m->nparam == 0 || nparam == m->nparam)) + { + if (defn) + { + if (nparam == m->nparam || nparam == -1) + *defn = m; + else + *defn = NULL; + } + return TRUE; + } + m = m->next; + } + + return FALSE; +} + +/* + * Count and mark off the parameters in a multi-line macro call. + * This is called both from within the multi-line macro expansion + * code, and also to mark off the default parameters when provided + * in a %macro definition line. + */ +static void +count_mmac_params(Token * t, int *nparam, Token *** params) +{ + int paramsize, brace; + + *nparam = paramsize = 0; + *params = NULL; + while (t) + { + if (*nparam >= paramsize) + { + paramsize += PARAM_DELTA; + *params = nasm_realloc(*params, sizeof(**params) * paramsize); + } + skip_white_(t); + brace = FALSE; + if (tok_is_(t, "{")) + brace = TRUE; + (*params)[(*nparam)++] = t; + while (tok_isnt_(t, brace ? "}" : ",")) + t = t->next; + if (t) + { /* got a comma/brace */ + t = t->next; + if (brace) + { + /* + * Now we've found the closing brace, look further + * for the comma. + */ + skip_white_(t); + if (tok_isnt_(t, ",")) + { + error(ERR_NONFATAL, + "braces do not enclose all of macro parameter"); + while (tok_isnt_(t, ",")) + t = t->next; + } + if (t) + t = t->next; /* eat the comma */ + } + } + } +} + +/* + * Determine whether one of the various `if' conditions is true or + * not. + * + * We must free the tline we get passed. + */ +static int +if_condition(Token * tline, int i) +{ + int j, casesense; + Token *t, *tt, **tptr, *origline; + struct tokenval tokval; + expr *evalresult; + + origline = tline; + + switch (i) + { + case PP_IFCTX: + case PP_ELIFCTX: + case PP_IFNCTX: + case PP_ELIFNCTX: + j = FALSE; /* have we matched yet? */ + while (cstk && tline) + { + skip_white_(tline); + if (!tline || tline->type != TOK_ID) + { + error(ERR_NONFATAL, + "`%s' expects context identifiers", + directives[i]); + free_tlist(origline); + return -1; + } + if (!nasm_stricmp(tline->text, cstk->name)) + j = TRUE; + tline = tline->next; + } + if (i == PP_IFNCTX || i == PP_ELIFNCTX) + j = !j; + free_tlist(origline); + return j; + + case PP_IFDEF: + case PP_ELIFDEF: + case PP_IFNDEF: + case PP_ELIFNDEF: + j = FALSE; /* have we matched yet? */ + while (tline) + { + skip_white_(tline); + if (!tline || (tline->type != TOK_ID && + (tline->type != TOK_PREPROC_ID || + tline->text[1] != '$'))) + { + error(ERR_NONFATAL, + "`%s' expects macro identifiers", + directives[i]); + free_tlist(origline); + return -1; + } + if (smacro_defined(NULL, tline->text, 0, NULL, 1)) + j = TRUE; + tline = tline->next; + } + if (i == PP_IFNDEF || i == PP_ELIFNDEF) + j = !j; + free_tlist(origline); + return j; + + case PP_IFIDN: + case PP_ELIFIDN: + case PP_IFNIDN: + case PP_ELIFNIDN: + case PP_IFIDNI: + case PP_ELIFIDNI: + case PP_IFNIDNI: + case PP_ELIFNIDNI: + tline = expand_smacro(tline); + t = tt = tline; + while (tok_isnt_(tt, ",")) + tt = tt->next; + if (!tt) + { + error(ERR_NONFATAL, + "`%s' expects two comma-separated arguments", + directives[i]); + free_tlist(tline); + return -1; + } + tt = tt->next; + casesense = (i == PP_IFIDN || i == PP_ELIFIDN || + i == PP_IFNIDN || i == PP_ELIFNIDN); + j = TRUE; /* assume equality unless proved not */ + while ((t->type != TOK_OTHER || strcmp(t->text, ",")) && tt) + { + if (tt->type == TOK_OTHER && !strcmp(tt->text, ",")) + { + error(ERR_NONFATAL, "`%s': more than one comma on line", + directives[i]); + free_tlist(tline); + return -1; + } + if (t->type == TOK_WHITESPACE) + { + t = t->next; + continue; + } + else if (tt->type == TOK_WHITESPACE) + { + tt = tt->next; + continue; + } + else if (tt->type != t->type || + mstrcmp(tt->text, t->text, casesense)) + { + j = FALSE; /* found mismatching tokens */ + break; + } + else + { + t = t->next; + tt = tt->next; + continue; + } + } + if ((t->type != TOK_OTHER || strcmp(t->text, ",")) || tt) + j = FALSE; /* trailing gunk on one end or other */ + if (i == PP_IFNIDN || i == PP_ELIFNIDN || + i == PP_IFNIDNI || i == PP_ELIFNIDNI) + j = !j; + free_tlist(tline); + return j; + + case PP_IFMACRO: + case PP_ELIFMACRO: + case PP_IFNMACRO: + case PP_ELIFNMACRO: + { + int found = 0; + MMacro searching, *mmac; + + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tok_type_(tline, TOK_ID)) + { + error(ERR_NONFATAL, + "`%s' expects a macro name", + directives[i]); + return -1; + } + searching.name = nasm_strdup(tline->text); + searching.casesense = (i == PP_MACRO); + searching.plus = FALSE; + searching.nolist = FALSE; + searching.in_progress = FALSE; + searching.rep_nest = NULL; + searching.nparam_min = 0; + searching.nparam_max = INT_MAX; + tline = expand_smacro(tline->next); + skip_white_(tline); + if (!tline) + { + } else if (!tok_type_(tline, TOK_NUMBER)) + { + error(ERR_NONFATAL, + "`%s' expects a parameter count or nothing", + directives[i]); + } + else + { + searching.nparam_min = searching.nparam_max = + readnum(tline->text, &j); + if (j) + error(ERR_NONFATAL, + "unable to parse parameter count `%s'", + tline->text); + } + if (tline && tok_is_(tline->next, "-")) + { + tline = tline->next->next; + if (tok_is_(tline, "*")) + searching.nparam_max = INT_MAX; + else if (!tok_type_(tline, TOK_NUMBER)) + error(ERR_NONFATAL, + "`%s' expects a parameter count after `-'", + directives[i]); + else + { + searching.nparam_max = readnum(tline->text, &j); + if (j) + error(ERR_NONFATAL, + "unable to parse parameter count `%s'", + tline->text); + if (searching.nparam_min > searching.nparam_max) + error(ERR_NONFATAL, + "minimum parameter count exceeds maximum"); + } + } + if (tline && tok_is_(tline->next, "+")) + { + tline = tline->next; + searching.plus = TRUE; + } + mmac = mmacros[hash(searching.name)]; + while (mmac) + { + if (!strcmp(mmac->name, searching.name) && + (mmac->nparam_min <= searching.nparam_max + || searching.plus) + && (searching.nparam_min <= mmac->nparam_max + || mmac->plus)) + { + found = TRUE; + break; + } + mmac = mmac->next; + } + nasm_free(searching.name); + free_tlist(origline); + if (i == PP_IFNMACRO || i == PP_ELIFNMACRO) + found = !found; + return found; + } + + case PP_IFID: + case PP_ELIFID: + case PP_IFNID: + case PP_ELIFNID: + case PP_IFNUM: + case PP_ELIFNUM: + case PP_IFNNUM: + case PP_ELIFNNUM: + case PP_IFSTR: + case PP_ELIFSTR: + case PP_IFNSTR: + case PP_ELIFNSTR: + tline = expand_smacro(tline); + t = tline; + while (tok_type_(t, TOK_WHITESPACE)) + t = t->next; + j = FALSE; /* placate optimiser */ + if (t) + switch (i) + { + case PP_IFID: + case PP_ELIFID: + case PP_IFNID: + case PP_ELIFNID: + j = (t->type == TOK_ID); + break; + case PP_IFNUM: + case PP_ELIFNUM: + case PP_IFNNUM: + case PP_ELIFNNUM: + j = (t->type == TOK_NUMBER); + break; + case PP_IFSTR: + case PP_ELIFSTR: + case PP_IFNSTR: + case PP_ELIFNSTR: + j = (t->type == TOK_STRING); + break; + } + if (i == PP_IFNID || i == PP_ELIFNID || + i == PP_IFNNUM || i == PP_ELIFNNUM || + i == PP_IFNSTR || i == PP_ELIFNSTR) + j = !j; + free_tlist(tline); + return j; + + case PP_IF: + case PP_ELIF: + t = tline = expand_smacro(tline); + tptr = &t; + tokval.t_type = TOKEN_INVALID; + evalresult = evaluate(ppscan, tptr, &tokval, + NULL, pass | CRITICAL, error, NULL); + free_tlist(tline); + if (!evalresult) + return -1; + if (tokval.t_type) + error(ERR_WARNING, + "trailing garbage after expression ignored"); + if (!is_simple(evalresult)) + { + error(ERR_NONFATAL, + "non-constant value given to `%s'", directives[i]); + return -1; + } + return reloc_value(evalresult) != 0; + + default: + error(ERR_FATAL, + "preprocessor directive `%s' not yet implemented", + directives[i]); + free_tlist(origline); + return -1; /* yeah, right */ + } +} + +/* + * Expand macros in a string. Used in %error and %include directives. + * First tokenise the string, apply "expand_smacro" and then de-tokenise back. + * The returned variable should ALWAYS be freed after usage. + */ +void +expand_macros_in_string(char **p) +{ + Token *line = tokenise(*p); + line = expand_smacro(line); + *p = detoken(line, FALSE); +} + +/** + * find and process preprocessor directive in passed line + * Find out if a line contains a preprocessor directive, and deal + * with it if so. + * + * If a directive _is_ found, it is the responsibility of this routine + * (and not the caller) to free_tlist() the line. + * + * @param tline a pointer to the current tokeninzed line linked list + * @return DIRECTIVE_FOUND or NO_DIRECTIVE_FOUND + * + */ +static int +do_directive(Token * tline) +{ + int i, j, k, m, nparam, nolist; + int offset; + char *p, *mname; + Include *inc; + Context *ctx; + Cond *cond; + SMacro *smac, **smhead; + MMacro *mmac; + Token *t, *tt, *param_start, *macro_start, *last, **tptr, *origline; + Line *l; + struct tokenval tokval; + expr *evalresult; + MMacro *tmp_defining; /* Used when manipulating rep_nest */ + + origline = tline; + + skip_white_(tline); + if (!tok_type_(tline, TOK_PREPROC_ID) || + (tline->text[1] == '%' || tline->text[1] == '$' + || tline->text[1] == '!')) + return NO_DIRECTIVE_FOUND; + + i = -1; + j = elements(directives); + while (j - i > 1) + { + k = (j + i) / 2; + m = nasm_stricmp(tline->text, directives[k]); + if (m == 0) { + if (tasm_compatible_mode) { + i = k; + j = -2; + } else if (k != PP_ARG && k != PP_LOCAL && k != PP_STACKSIZE) { + i = k; + j = -2; + } + break; + } + else if (m < 0) { + j = k; + } + else + i = k; + } + + /* + * If we're in a non-emitting branch of a condition construct, + * or walking to the end of an already terminated %rep block, + * we should ignore all directives except for condition + * directives. + */ + if (((istk->conds && !emitting(istk->conds->state)) || + (istk->mstk && !istk->mstk->in_progress)) && + !is_condition(i)) + { + return NO_DIRECTIVE_FOUND; + } + + /* + * If we're defining a macro or reading a %rep block, we should + * ignore all directives except for %macro/%imacro (which + * generate an error), %endm/%endmacro, and (only if we're in a + * %rep block) %endrep. If we're in a %rep block, another %rep + * causes an error, so should be let through. + */ + if (defining && i != PP_MACRO && i != PP_IMACRO && + i != PP_ENDMACRO && i != PP_ENDM && + (defining->name || (i != PP_ENDREP && i != PP_REP))) + { + return NO_DIRECTIVE_FOUND; + } + + if (j != -2) + { + error(ERR_NONFATAL, "unknown preprocessor directive `%s'", + tline->text); + return NO_DIRECTIVE_FOUND; /* didn't get it */ + } + + switch (i) + { + case PP_STACKSIZE: + /* Directive to tell NASM what the default stack size is. The + * default is for a 16-bit stack, and this can be overriden with + * %stacksize large. + * the following form: + * + * ARG arg1:WORD, arg2:DWORD, arg4:QWORD + */ + tline = tline->next; + if (tline && tline->type == TOK_WHITESPACE) + tline = tline->next; + if (!tline || tline->type != TOK_ID) + { + error(ERR_NONFATAL, "`%%stacksize' missing size parameter"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + if (nasm_stricmp(tline->text, "flat") == 0) + { + /* All subsequent ARG directives are for a 32-bit stack */ + StackSize = 4; + StackPointer = "ebp"; + ArgOffset = 8; + LocalOffset = 4; + } + else if (nasm_stricmp(tline->text, "large") == 0) + { + /* All subsequent ARG directives are for a 16-bit stack, + * far function call. + */ + StackSize = 2; + StackPointer = "bp"; + ArgOffset = 4; + LocalOffset = 2; + } + else if (nasm_stricmp(tline->text, "small") == 0) + { + /* All subsequent ARG directives are for a 16-bit stack, + * far function call. We don't support near functions. + */ + StackSize = 2; + StackPointer = "bp"; + ArgOffset = 6; + LocalOffset = 2; + } + else + { + error(ERR_NONFATAL, "`%%stacksize' invalid size type"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_ARG: + /* TASM like ARG directive to define arguments to functions, in + * the following form: + * + * ARG arg1:WORD, arg2:DWORD, arg4:QWORD + */ + offset = ArgOffset; + do + { + char *arg, directive[256]; + int size = StackSize; + + /* Find the argument name */ + tline = tline->next; + if (tline && tline->type == TOK_WHITESPACE) + tline = tline->next; + if (!tline || tline->type != TOK_ID) + { + error(ERR_NONFATAL, "`%%arg' missing argument parameter"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + arg = tline->text; + + /* Find the argument size type */ + tline = tline->next; + if (!tline || tline->type != TOK_OTHER + || tline->text[0] != ':') + { + error(ERR_NONFATAL, + "Syntax error processing `%%arg' directive"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + tline = tline->next; + if (!tline || tline->type != TOK_ID) + { + error(ERR_NONFATAL, + "`%%arg' missing size type parameter"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + /* Allow macro expansion of type parameter */ + tt = tokenise(tline->text); + tt = expand_smacro(tt); + if (nasm_stricmp(tt->text, "byte") == 0) + { + size = MAX(StackSize, 1); + } + else if (nasm_stricmp(tt->text, "word") == 0) + { + size = MAX(StackSize, 2); + } + else if (nasm_stricmp(tt->text, "dword") == 0) + { + size = MAX(StackSize, 4); + } + else if (nasm_stricmp(tt->text, "qword") == 0) + { + size = MAX(StackSize, 8); + } + else if (nasm_stricmp(tt->text, "tword") == 0) + { + size = MAX(StackSize, 10); + } + else + { + error(ERR_NONFATAL, + "Invalid size type for `%%arg' missing directive"); + free_tlist(tt); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + free_tlist(tt); + + /* Now define the macro for the argument */ + sprintf(directive, "%%define %s (%s+%d)", arg, StackPointer, + offset); + do_directive(tokenise(directive)); + offset += size; + + /* Move to the next argument in the list */ + tline = tline->next; + if (tline && tline->type == TOK_WHITESPACE) + tline = tline->next; + } + while (tline && tline->type == TOK_OTHER + && tline->text[0] == ','); + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_LOCAL: + /* TASM like LOCAL directive to define local variables for a + * function, in the following form: + * + * LOCAL local1:WORD, local2:DWORD, local4:QWORD = LocalSize + * + * The '= LocalSize' at the end is ignored by NASM, but is + * required by TASM to define the local parameter size (and used + * by the TASM macro package). + */ + offset = LocalOffset; + do + { + char *local, directive[256]; + int size = StackSize; + + /* Find the argument name */ + tline = tline->next; + if (tline && tline->type == TOK_WHITESPACE) + tline = tline->next; + if (!tline || tline->type != TOK_ID) + { + error(ERR_NONFATAL, + "`%%local' missing argument parameter"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + local = tline->text; + + /* Find the argument size type */ + tline = tline->next; + if (!tline || tline->type != TOK_OTHER + || tline->text[0] != ':') + { + error(ERR_NONFATAL, + "Syntax error processing `%%local' directive"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + tline = tline->next; + if (!tline || tline->type != TOK_ID) + { + error(ERR_NONFATAL, + "`%%local' missing size type parameter"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + /* Allow macro expansion of type parameter */ + tt = tokenise(tline->text); + tt = expand_smacro(tt); + if (nasm_stricmp(tt->text, "byte") == 0) + { + size = MAX(StackSize, 1); + } + else if (nasm_stricmp(tt->text, "word") == 0) + { + size = MAX(StackSize, 2); + } + else if (nasm_stricmp(tt->text, "dword") == 0) + { + size = MAX(StackSize, 4); + } + else if (nasm_stricmp(tt->text, "qword") == 0) + { + size = MAX(StackSize, 8); + } + else if (nasm_stricmp(tt->text, "tword") == 0) + { + size = MAX(StackSize, 10); + } + else + { + error(ERR_NONFATAL, + "Invalid size type for `%%local' missing directive"); + free_tlist(tt); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + free_tlist(tt); + + /* Now define the macro for the argument */ + sprintf(directive, "%%define %s (%s-%d)", local, StackPointer, + offset); + do_directive(tokenise(directive)); + offset += size; + + /* Now define the assign to setup the enter_c macro correctly */ + sprintf(directive, "%%assign %%$localsize %%$localsize+%d", + size); + do_directive(tokenise(directive)); + + /* Move to the next argument in the list */ + tline = tline->next; + if (tline && tline->type == TOK_WHITESPACE) + tline = tline->next; + } + while (tline && tline->type == TOK_OTHER + && tline->text[0] == ','); + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_CLEAR: + if (tline->next) + error(ERR_WARNING, + "trailing garbage after `%%clear' ignored"); + for (j = 0; j < NHASH; j++) + { + while (mmacros[j]) + { + MMacro *m = mmacros[j]; + mmacros[j] = m->next; + free_mmacro(m); + } + while (smacros[j]) + { + SMacro *s = smacros[j]; + smacros[j] = smacros[j]->next; + nasm_free(s->name); + free_tlist(s->expansion); + nasm_free(s); + } + } + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_INCLUDE: + tline = tline->next; + skip_white_(tline); + if (!tline || (tline->type != TOK_STRING && + tline->type != TOK_INTERNAL_STRING)) + { + error(ERR_NONFATAL, "`%%include' expects a file name"); + free_tlist(origline); + return DIRECTIVE_FOUND; /* but we did _something_ */ + } + if (tline->next) + error(ERR_WARNING, + "trailing garbage after `%%include' ignored"); + if (tline->type != TOK_INTERNAL_STRING) + { + p = tline->text + 1; /* point past the quote to the name */ + p[strlen(p) - 1] = '\0'; /* remove the trailing quote */ + } + else + p = tline->text; /* internal_string is easier */ + expand_macros_in_string(&p); + inc = nasm_malloc(sizeof(Include)); + inc->next = istk; + inc->conds = NULL; + inc->fp = inc_fopen(p); + inc->fname = src_set_fname(p); + inc->lineno = src_set_linnum(0); + inc->lineinc = 1; + inc->expansion = NULL; + inc->mstk = NULL; + istk = inc; + list->uplevel(LIST_INCLUDE); + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_PUSH: + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tok_type_(tline, TOK_ID)) + { + error(ERR_NONFATAL, "`%%push' expects a context identifier"); + free_tlist(origline); + return DIRECTIVE_FOUND; /* but we did _something_ */ + } + if (tline->next) + error(ERR_WARNING, "trailing garbage after `%%push' ignored"); + ctx = nasm_malloc(sizeof(Context)); + ctx->next = cstk; + ctx->localmac = NULL; + ctx->name = nasm_strdup(tline->text); + ctx->number = unique++; + cstk = ctx; + free_tlist(origline); + break; + + case PP_REPL: + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tok_type_(tline, TOK_ID)) + { + error(ERR_NONFATAL, "`%%repl' expects a context identifier"); + free_tlist(origline); + return DIRECTIVE_FOUND; /* but we did _something_ */ + } + if (tline->next) + error(ERR_WARNING, "trailing garbage after `%%repl' ignored"); + if (!cstk) + error(ERR_NONFATAL, "`%%repl': context stack is empty"); + else + { + nasm_free(cstk->name); + cstk->name = nasm_strdup(tline->text); + } + free_tlist(origline); + break; + + case PP_POP: + if (tline->next) + error(ERR_WARNING, "trailing garbage after `%%pop' ignored"); + if (!cstk) + error(ERR_NONFATAL, + "`%%pop': context stack is already empty"); + else + ctx_pop(); + free_tlist(origline); + break; + + case PP_ERROR: + tline->next = expand_smacro(tline->next); + tline = tline->next; + skip_white_(tline); + if (tok_type_(tline, TOK_STRING)) + { + p = tline->text + 1; /* point past the quote to the name */ + p[strlen(p) - 1] = '\0'; /* remove the trailing quote */ + expand_macros_in_string(&p); + error(ERR_NONFATAL, "%s", p); + nasm_free(p); + } + else + { + p = detoken(tline, FALSE); + error(ERR_WARNING, "%s", p); + nasm_free(p); + } + free_tlist(origline); + break; + + case PP_IF: + case PP_IFCTX: + case PP_IFDEF: + case PP_IFID: + case PP_IFIDN: + case PP_IFIDNI: + case PP_IFMACRO: + case PP_IFNCTX: + case PP_IFNDEF: + case PP_IFNID: + case PP_IFNIDN: + case PP_IFNIDNI: + case PP_IFNMACRO: + case PP_IFNNUM: + case PP_IFNSTR: + case PP_IFNUM: + case PP_IFSTR: + if (istk->conds && !emitting(istk->conds->state)) + j = COND_NEVER; + else + { + j = if_condition(tline->next, i); + tline->next = NULL; /* it got freed */ + free_tlist(origline); + j = j < 0 ? COND_NEVER : j ? COND_IF_TRUE : COND_IF_FALSE; + } + cond = nasm_malloc(sizeof(Cond)); + cond->next = istk->conds; + cond->state = j; + istk->conds = cond; + return DIRECTIVE_FOUND; + + case PP_ELIF: + case PP_ELIFCTX: + case PP_ELIFDEF: + case PP_ELIFID: + case PP_ELIFIDN: + case PP_ELIFIDNI: + case PP_ELIFMACRO: + case PP_ELIFNCTX: + case PP_ELIFNDEF: + case PP_ELIFNID: + case PP_ELIFNIDN: + case PP_ELIFNIDNI: + case PP_ELIFNMACRO: + case PP_ELIFNNUM: + case PP_ELIFNSTR: + case PP_ELIFNUM: + case PP_ELIFSTR: + if (!istk->conds) + error(ERR_FATAL, "`%s': no matching `%%if'", directives[i]); + if (emitting(istk->conds->state) + || istk->conds->state == COND_NEVER) + istk->conds->state = COND_NEVER; + else + { + /* + * IMPORTANT: In the case of %if, we will already have + * called expand_mmac_params(); however, if we're + * processing an %elif we must have been in a + * non-emitting mode, which would have inhibited + * the normal invocation of expand_mmac_params(). Therefore, + * we have to do it explicitly here. + */ + j = if_condition(expand_mmac_params(tline->next), i); + tline->next = NULL; /* it got freed */ + free_tlist(origline); + istk->conds->state = + j < 0 ? COND_NEVER : j ? COND_IF_TRUE : COND_IF_FALSE; + } + return DIRECTIVE_FOUND; + + case PP_ELSE: + if (tline->next) + error(ERR_WARNING, "trailing garbage after `%%else' ignored"); + if (!istk->conds) + error(ERR_FATAL, "`%%else': no matching `%%if'"); + if (emitting(istk->conds->state) + || istk->conds->state == COND_NEVER) + istk->conds->state = COND_ELSE_FALSE; + else + istk->conds->state = COND_ELSE_TRUE; + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_ENDIF: + if (tline->next) + error(ERR_WARNING, + "trailing garbage after `%%endif' ignored"); + if (!istk->conds) + error(ERR_FATAL, "`%%endif': no matching `%%if'"); + cond = istk->conds; + istk->conds = cond->next; + nasm_free(cond); + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_MACRO: + case PP_IMACRO: + if (defining) + error(ERR_FATAL, + "`%%%smacro': already defining a macro", + (i == PP_IMACRO ? "i" : "")); + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tok_type_(tline, TOK_ID)) + { + error(ERR_NONFATAL, + "`%%%smacro' expects a macro name", + (i == PP_IMACRO ? "i" : "")); + return DIRECTIVE_FOUND; + } + defining = nasm_malloc(sizeof(MMacro)); + defining->name = nasm_strdup(tline->text); + defining->casesense = (i == PP_MACRO); + defining->plus = FALSE; + defining->nolist = FALSE; + defining->in_progress = FALSE; + defining->rep_nest = NULL; + tline = expand_smacro(tline->next); + skip_white_(tline); + if (!tok_type_(tline, TOK_NUMBER)) + { + error(ERR_NONFATAL, + "`%%%smacro' expects a parameter count", + (i == PP_IMACRO ? "i" : "")); + defining->nparam_min = defining->nparam_max = 0; + } + else + { + defining->nparam_min = defining->nparam_max = + readnum(tline->text, &j); + if (j) + error(ERR_NONFATAL, + "unable to parse parameter count `%s'", + tline->text); + } + if (tline && tok_is_(tline->next, "-")) + { + tline = tline->next->next; + if (tok_is_(tline, "*")) + defining->nparam_max = INT_MAX; + else if (!tok_type_(tline, TOK_NUMBER)) + error(ERR_NONFATAL, + "`%%%smacro' expects a parameter count after `-'", + (i == PP_IMACRO ? "i" : "")); + else + { + defining->nparam_max = readnum(tline->text, &j); + if (j) + error(ERR_NONFATAL, + "unable to parse parameter count `%s'", + tline->text); + if (defining->nparam_min > defining->nparam_max) + error(ERR_NONFATAL, + "minimum parameter count exceeds maximum"); + } + } + if (tline && tok_is_(tline->next, "+")) + { + tline = tline->next; + defining->plus = TRUE; + } + if (tline && tok_type_(tline->next, TOK_ID) && + !nasm_stricmp(tline->next->text, ".nolist")) + { + tline = tline->next; + defining->nolist = TRUE; + } + mmac = mmacros[hash(defining->name)]; + while (mmac) + { + if (!strcmp(mmac->name, defining->name) && + (mmac->nparam_min <= defining->nparam_max + || defining->plus) + && (defining->nparam_min <= mmac->nparam_max + || mmac->plus)) + { + error(ERR_WARNING, + "redefining multi-line macro `%s'", + defining->name); + break; + } + mmac = mmac->next; + } + /* + * Handle default parameters. + */ + if (tline && tline->next) + { + defining->dlist = tline->next; + tline->next = NULL; + count_mmac_params(defining->dlist, &defining->ndefs, + &defining->defaults); + } + else + { + defining->dlist = NULL; + defining->defaults = NULL; + } + defining->expansion = NULL; + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_ENDM: + case PP_ENDMACRO: + if (!defining) + { + error(ERR_NONFATAL, "`%s': not defining a macro", + tline->text); + return DIRECTIVE_FOUND; + } + k = hash(defining->name); + defining->next = mmacros[k]; + mmacros[k] = defining; + defining = NULL; + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_ROTATE: + if (tline->next && tline->next->type == TOK_WHITESPACE) + tline = tline->next; + if (tline->next == NULL) + { + free_tlist(origline); + error(ERR_NONFATAL, "`%%rotate' missing rotate count"); + return DIRECTIVE_FOUND; + } + t = expand_smacro(tline->next); + tline->next = NULL; + free_tlist(origline); + tline = t; + tptr = &t; + tokval.t_type = TOKEN_INVALID; + evalresult = + evaluate(ppscan, tptr, &tokval, NULL, pass, error, NULL); + free_tlist(tline); + if (!evalresult) + return DIRECTIVE_FOUND; + if (tokval.t_type) + error(ERR_WARNING, + "trailing garbage after expression ignored"); + if (!is_simple(evalresult)) + { + error(ERR_NONFATAL, "non-constant value given to `%%rotate'"); + return DIRECTIVE_FOUND; + } + mmac = istk->mstk; + while (mmac && !mmac->name) /* avoid mistaking %reps for macros */ + mmac = mmac->next_active; + if (!mmac) + { + error(ERR_NONFATAL, + "`%%rotate' invoked outside a macro call"); + } + else if (mmac->nparam == 0) + { + error(ERR_NONFATAL, + "`%%rotate' invoked within macro without parameters"); + } + else + { + mmac->rotate = mmac->rotate + reloc_value(evalresult); + + if (mmac->rotate < 0) + mmac->rotate = + mmac->nparam - (-mmac->rotate) % mmac->nparam; + mmac->rotate %= mmac->nparam; + } + return DIRECTIVE_FOUND; + + case PP_REP: + nolist = FALSE; + tline = tline->next; + if (tline->next && tline->next->type == TOK_WHITESPACE) + tline = tline->next; + if (tline->next && tline->next->type == TOK_ID && + !nasm_stricmp(tline->next->text, ".nolist")) + { + tline = tline->next; + nolist = TRUE; + } + t = expand_smacro(tline->next); + tline->next = NULL; + free_tlist(origline); + tline = t; + tptr = &t; + tokval.t_type = TOKEN_INVALID; + evalresult = + evaluate(ppscan, tptr, &tokval, NULL, pass, error, NULL); + free_tlist(tline); + if (!evalresult) + return DIRECTIVE_FOUND; + if (tokval.t_type) + error(ERR_WARNING, + "trailing garbage after expression ignored"); + if (!is_simple(evalresult)) + { + error(ERR_NONFATAL, "non-constant value given to `%%rep'"); + return DIRECTIVE_FOUND; + } + tmp_defining = defining; + defining = nasm_malloc(sizeof(MMacro)); + defining->name = NULL; /* flags this macro as a %rep block */ + defining->casesense = 0; + defining->plus = FALSE; + defining->nolist = nolist; + defining->in_progress = reloc_value(evalresult) + 1; + defining->nparam_min = defining->nparam_max = 0; + defining->defaults = NULL; + defining->dlist = NULL; + defining->expansion = NULL; + defining->next_active = istk->mstk; + defining->rep_nest = tmp_defining; + return DIRECTIVE_FOUND; + + case PP_ENDREP: + if (!defining || defining->name) + { + error(ERR_NONFATAL, "`%%endrep': no matching `%%rep'"); + return DIRECTIVE_FOUND; + } + + /* + * Now we have a "macro" defined - although it has no name + * and we won't be entering it in the hash tables - we must + * push a macro-end marker for it on to istk->expansion. + * After that, it will take care of propagating itself (a + * macro-end marker line for a macro which is really a %rep + * block will cause the macro to be re-expanded, complete + * with another macro-end marker to ensure the process + * continues) until the whole expansion is forcibly removed + * from istk->expansion by a %exitrep. + */ + l = nasm_malloc(sizeof(Line)); + l->next = istk->expansion; + l->finishes = defining; + l->first = NULL; + istk->expansion = l; + + istk->mstk = defining; + + list->uplevel(defining->nolist ? LIST_MACRO_NOLIST : LIST_MACRO); + tmp_defining = defining; + defining = defining->rep_nest; + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_EXITREP: + /* + * We must search along istk->expansion until we hit a + * macro-end marker for a macro with no name. Then we set + * its `in_progress' flag to 0. + */ + for (l = istk->expansion; l; l = l->next) + if (l->finishes && !l->finishes->name) + break; + + if (l) + l->finishes->in_progress = 0; + else + error(ERR_NONFATAL, "`%%exitrep' not within `%%rep' block"); + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_XDEFINE: + case PP_IXDEFINE: + case PP_DEFINE: + case PP_IDEFINE: + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tline || (tline->type != TOK_ID && + (tline->type != TOK_PREPROC_ID || + tline->text[1] != '$'))) + { + error(ERR_NONFATAL, + "`%%%s%sdefine' expects a macro identifier", + ((i == PP_IDEFINE || i == PP_IXDEFINE) ? "i" : ""), + ((i == PP_XDEFINE || i == PP_IXDEFINE) ? "x" : "")); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + ctx = get_ctx(tline->text, FALSE); + if (!ctx) + smhead = &smacros[hash(tline->text)]; + else + smhead = &ctx->localmac; + mname = tline->text; + last = tline; + param_start = tline = tline->next; + nparam = 0; + + /* Expand the macro definition now for %xdefine and %ixdefine */ + if ((i == PP_XDEFINE) || (i == PP_IXDEFINE)) + tline = expand_smacro(tline); + + if (tok_is_(tline, "(")) + { + /* + * This macro has parameters. + */ + + tline = tline->next; + while (1) + { + skip_white_(tline); + if (!tline) + { + error(ERR_NONFATAL, "parameter identifier expected"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + if (tline->type != TOK_ID) + { + error(ERR_NONFATAL, + "`%s': parameter identifier expected", + tline->text); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + tline->type = TOK_SMAC_PARAM + nparam++; + tline = tline->next; + skip_white_(tline); + if (tok_is_(tline, ",")) + { + tline = tline->next; + continue; + } + if (!tok_is_(tline, ")")) + { + error(ERR_NONFATAL, + "`)' expected to terminate macro template"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + break; + } + last = tline; + tline = tline->next; + } + if (tok_type_(tline, TOK_WHITESPACE)) + last = tline, tline = tline->next; + macro_start = NULL; + last->next = NULL; + t = tline; + while (t) + { + if (t->type == TOK_ID) + { + for (tt = param_start; tt; tt = tt->next) + if (tt->type >= TOK_SMAC_PARAM && + !strcmp(tt->text, t->text)) + t->type = tt->type; + } + tt = t->next; + t->next = macro_start; + macro_start = t; + t = tt; + } + /* + * Good. We now have a macro name, a parameter count, and a + * token list (in reverse order) for an expansion. We ought + * to be OK just to create an SMacro, store it, and let + * free_tlist have the rest of the line (which we have + * carefully re-terminated after chopping off the expansion + * from the end). + */ + if (smacro_defined(ctx, mname, nparam, &smac, i == PP_DEFINE)) + { + if (!smac) + { + error(ERR_WARNING, + "single-line macro `%s' defined both with and" + " without parameters", mname); + free_tlist(origline); + free_tlist(macro_start); + return DIRECTIVE_FOUND; + } + else + { + /* + * We're redefining, so we have to take over an + * existing SMacro structure. This means freeing + * what was already in it. + */ + nasm_free(smac->name); + free_tlist(smac->expansion); + } + } + else + { + smac = nasm_malloc(sizeof(SMacro)); + smac->next = *smhead; + *smhead = smac; + } + smac->name = nasm_strdup(mname); + smac->casesense = ((i == PP_DEFINE) || (i == PP_XDEFINE)); + smac->nparam = nparam; + smac->expansion = macro_start; + smac->in_progress = FALSE; + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_UNDEF: + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tline || (tline->type != TOK_ID && + (tline->type != TOK_PREPROC_ID || + tline->text[1] != '$'))) + { + error(ERR_NONFATAL, "`%%undef' expects a macro identifier"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + if (tline->next) + { + error(ERR_WARNING, + "trailing garbage after macro name ignored"); + } + + /* Find the context that symbol belongs to */ + ctx = get_ctx(tline->text, FALSE); + if (!ctx) + smhead = &smacros[hash(tline->text)]; + else + smhead = &ctx->localmac; + + mname = tline->text; + last = tline; + last->next = NULL; + + /* + * We now have a macro name... go hunt for it. + */ + while (smacro_defined(ctx, mname, -1, &smac, 1)) + { + /* Defined, so we need to find its predecessor and nuke it */ + SMacro **s; + for (s = smhead; *s && *s != smac; s = &(*s)->next); + if (*s) + { + *s = smac->next; + nasm_free(smac->name); + free_tlist(smac->expansion); + nasm_free(smac); + } + } + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_STRLEN: + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tline || (tline->type != TOK_ID && + (tline->type != TOK_PREPROC_ID || + tline->text[1] != '$'))) + { + error(ERR_NONFATAL, + "`%%strlen' expects a macro identifier as first parameter"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + ctx = get_ctx(tline->text, FALSE); + if (!ctx) + smhead = &smacros[hash(tline->text)]; + else + smhead = &ctx->localmac; + mname = tline->text; + last = tline; + tline = expand_smacro(tline->next); + last->next = NULL; + + t = tline; + while (tok_type_(t, TOK_WHITESPACE)) + t = t->next; + /* t should now point to the string */ + if (t->type != TOK_STRING) + { + error(ERR_NONFATAL, + "`%%strlen` requires string as second parameter"); + free_tlist(tline); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + macro_start = nasm_malloc(sizeof(*macro_start)); + macro_start->next = NULL; + make_tok_num(macro_start, strlen(t->text) - 2); + macro_start->mac = NULL; + + /* + * We now have a macro name, an implicit parameter count of + * zero, and a numeric token to use as an expansion. Create + * and store an SMacro. + */ + if (smacro_defined(ctx, mname, 0, &smac, i == PP_STRLEN)) + { + if (!smac) + error(ERR_WARNING, + "single-line macro `%s' defined both with and" + " without parameters", mname); + else + { + /* + * We're redefining, so we have to take over an + * existing SMacro structure. This means freeing + * what was already in it. + */ + nasm_free(smac->name); + free_tlist(smac->expansion); + } + } + else + { + smac = nasm_malloc(sizeof(SMacro)); + smac->next = *smhead; + *smhead = smac; + } + smac->name = nasm_strdup(mname); + smac->casesense = (i == PP_STRLEN); + smac->nparam = 0; + smac->expansion = macro_start; + smac->in_progress = FALSE; + free_tlist(tline); + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_SUBSTR: + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tline || (tline->type != TOK_ID && + (tline->type != TOK_PREPROC_ID || + tline->text[1] != '$'))) + { + error(ERR_NONFATAL, + "`%%substr' expects a macro identifier as first parameter"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + ctx = get_ctx(tline->text, FALSE); + if (!ctx) + smhead = &smacros[hash(tline->text)]; + else + smhead = &ctx->localmac; + mname = tline->text; + last = tline; + tline = expand_smacro(tline->next); + last->next = NULL; + + t = tline->next; + while (tok_type_(t, TOK_WHITESPACE)) + t = t->next; + + /* t should now point to the string */ + if (t->type != TOK_STRING) + { + error(ERR_NONFATAL, + "`%%substr` requires string as second parameter"); + free_tlist(tline); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + tt = t->next; + tptr = &tt; + tokval.t_type = TOKEN_INVALID; + evalresult = + evaluate(ppscan, tptr, &tokval, NULL, pass, error, NULL); + if (!evalresult) + { + free_tlist(tline); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + if (!is_simple(evalresult)) + { + error(ERR_NONFATAL, "non-constant value given to `%%substr`"); + free_tlist(tline); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + macro_start = nasm_malloc(sizeof(*macro_start)); + macro_start->next = NULL; + macro_start->text = nasm_strdup("'''"); + if (evalresult->value > 0 + && evalresult->value < strlen(t->text) - 1) + { + macro_start->text[1] = t->text[evalresult->value]; + } + else + { + macro_start->text[2] = '\0'; + } + macro_start->type = TOK_STRING; + macro_start->mac = NULL; + + /* + * We now have a macro name, an implicit parameter count of + * zero, and a numeric token to use as an expansion. Create + * and store an SMacro. + */ + if (smacro_defined(ctx, mname, 0, &smac, i == PP_SUBSTR)) + { + if (!smac) + error(ERR_WARNING, + "single-line macro `%s' defined both with and" + " without parameters", mname); + else + { + /* + * We're redefining, so we have to take over an + * existing SMacro structure. This means freeing + * what was already in it. + */ + nasm_free(smac->name); + free_tlist(smac->expansion); + } + } + else + { + smac = nasm_malloc(sizeof(SMacro)); + smac->next = *smhead; + *smhead = smac; + } + smac->name = nasm_strdup(mname); + smac->casesense = (i == PP_SUBSTR); + smac->nparam = 0; + smac->expansion = macro_start; + smac->in_progress = FALSE; + free_tlist(tline); + free_tlist(origline); + return DIRECTIVE_FOUND; + + + case PP_ASSIGN: + case PP_IASSIGN: + tline = tline->next; + skip_white_(tline); + tline = expand_id(tline); + if (!tline || (tline->type != TOK_ID && + (tline->type != TOK_PREPROC_ID || + tline->text[1] != '$'))) + { + error(ERR_NONFATAL, + "`%%%sassign' expects a macro identifier", + (i == PP_IASSIGN ? "i" : "")); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + ctx = get_ctx(tline->text, FALSE); + if (!ctx) + smhead = &smacros[hash(tline->text)]; + else + smhead = &ctx->localmac; + mname = tline->text; + last = tline; + tline = expand_smacro(tline->next); + last->next = NULL; + + t = tline; + tptr = &t; + tokval.t_type = TOKEN_INVALID; + evalresult = + evaluate(ppscan, tptr, &tokval, NULL, pass, error, NULL); + free_tlist(tline); + if (!evalresult) + { + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + if (tokval.t_type) + error(ERR_WARNING, + "trailing garbage after expression ignored"); + + if (!is_simple(evalresult)) + { + error(ERR_NONFATAL, + "non-constant value given to `%%%sassign'", + (i == PP_IASSIGN ? "i" : "")); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + + macro_start = nasm_malloc(sizeof(*macro_start)); + macro_start->next = NULL; + make_tok_num(macro_start, reloc_value(evalresult)); + macro_start->mac = NULL; + + /* + * We now have a macro name, an implicit parameter count of + * zero, and a numeric token to use as an expansion. Create + * and store an SMacro. + */ + if (smacro_defined(ctx, mname, 0, &smac, i == PP_ASSIGN)) + { + if (!smac) + error(ERR_WARNING, + "single-line macro `%s' defined both with and" + " without parameters", mname); + else + { + /* + * We're redefining, so we have to take over an + * existing SMacro structure. This means freeing + * what was already in it. + */ + nasm_free(smac->name); + free_tlist(smac->expansion); + } + } + else + { + smac = nasm_malloc(sizeof(SMacro)); + smac->next = *smhead; + *smhead = smac; + } + smac->name = nasm_strdup(mname); + smac->casesense = (i == PP_ASSIGN); + smac->nparam = 0; + smac->expansion = macro_start; + smac->in_progress = FALSE; + free_tlist(origline); + return DIRECTIVE_FOUND; + + case PP_LINE: + /* + * Syntax is `%line nnn[+mmm] [filename]' + */ + tline = tline->next; + skip_white_(tline); + if (!tok_type_(tline, TOK_NUMBER)) + { + error(ERR_NONFATAL, "`%%line' expects line number"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + k = readnum(tline->text, &j); + m = 1; + tline = tline->next; + if (tok_is_(tline, "+")) + { + tline = tline->next; + if (!tok_type_(tline, TOK_NUMBER)) + { + error(ERR_NONFATAL, "`%%line' expects line increment"); + free_tlist(origline); + return DIRECTIVE_FOUND; + } + m = readnum(tline->text, &j); + tline = tline->next; + } + skip_white_(tline); + src_set_linnum(k); + istk->lineinc = m; + if (tline) + { + nasm_free(src_set_fname(detoken(tline, FALSE))); + } + free_tlist(origline); + return DIRECTIVE_FOUND; + + default: + error(ERR_FATAL, + "preprocessor directive `%s' not yet implemented", + directives[i]); + break; + } + return DIRECTIVE_FOUND; +} + +/* + * Ensure that a macro parameter contains a condition code and + * nothing else. Return the condition code index if so, or -1 + * otherwise. + */ +static int +find_cc(Token * t) +{ + Token *tt; + int i, j, k, m; + + skip_white_(t); + if (t->type != TOK_ID) + return -1; + tt = t->next; + skip_white_(tt); + if (tt && (tt->type != TOK_OTHER || strcmp(tt->text, ","))) + return -1; + + i = -1; + j = elements(conditions); + while (j - i > 1) + { + k = (j + i) / 2; + m = nasm_stricmp(t->text, conditions[k]); + if (m == 0) + { + i = k; + j = -2; + break; + } + else if (m < 0) + { + j = k; + } + else + i = k; + } + if (j != -2) + return -1; + return i; +} + +/* + * Expand MMacro-local things: parameter references (%0, %n, %+n, + * %-n) and MMacro-local identifiers (%%foo). + */ +static Token * +expand_mmac_params(Token * tline) +{ + Token *t, *tt, **tail, *thead; + + tail = &thead; + thead = NULL; + + while (tline) + { + if (tline->type == TOK_PREPROC_ID && + (((tline->text[1] == '+' || tline->text[1] == '-') + && tline->text[2]) || tline->text[1] == '%' + || (tline->text[1] >= '0' && tline->text[1] <= '9'))) + { + char *text = NULL; + int type = 0, cc; /* type = 0 to placate optimisers */ + char tmpbuf[30]; + int n, i; + MMacro *mac; + + t = tline; + tline = tline->next; + + mac = istk->mstk; + while (mac && !mac->name) /* avoid mistaking %reps for macros */ + mac = mac->next_active; + if (!mac) + error(ERR_NONFATAL, "`%s': not in a macro call", t->text); + else + switch (t->text[1]) + { + /* + * We have to make a substitution of one of the + * forms %1, %-1, %+1, %%foo, %0. + */ + case '0': + type = TOK_NUMBER; + sprintf(tmpbuf, "%d", mac->nparam); + text = nasm_strdup(tmpbuf); + break; + case '%': + type = TOK_ID; + sprintf(tmpbuf, "..@%lu.", mac->unique); + text = nasm_strcat(tmpbuf, t->text + 2); + break; + case '-': + n = atoi(t->text + 2) - 1; + if (n >= mac->nparam) + tt = NULL; + else + { + if (mac->nparam > 1) + n = (n + mac->rotate) % mac->nparam; + tt = mac->params[n]; + } + cc = find_cc(tt); + if (cc == -1) + { + error(ERR_NONFATAL, + "macro parameter %d is not a condition code", + n + 1); + text = NULL; + } + else + { + type = TOK_ID; + if (inverse_ccs[cc] == -1) + { + error(ERR_NONFATAL, + "condition code `%s' is not invertible", + conditions[cc]); + text = NULL; + } + else + text = + nasm_strdup(conditions[inverse_ccs + [cc]]); + } + break; + case '+': + n = atoi(t->text + 2) - 1; + if (n >= mac->nparam) + tt = NULL; + else + { + if (mac->nparam > 1) + n = (n + mac->rotate) % mac->nparam; + tt = mac->params[n]; + } + cc = find_cc(tt); + if (cc == -1) + { + error(ERR_NONFATAL, + "macro parameter %d is not a condition code", + n + 1); + text = NULL; + } + else + { + type = TOK_ID; + text = nasm_strdup(conditions[cc]); + } + break; + default: + n = atoi(t->text + 1) - 1; + if (n >= mac->nparam) + tt = NULL; + else + { + if (mac->nparam > 1) + n = (n + mac->rotate) % mac->nparam; + tt = mac->params[n]; + } + if (tt) + { + for (i = 0; i < mac->paramlen[n]; i++) + { + *tail = + new_Token(NULL, tt->type, tt->text, + 0); + tail = &(*tail)->next; + tt = tt->next; + } + } + text = NULL; /* we've done it here */ + break; + } + if (!text) + { + delete_Token(t); + } + else + { + *tail = t; + tail = &t->next; + t->type = type; + nasm_free(t->text); + t->text = text; + t->mac = NULL; + } + continue; + } + else + { + t = *tail = tline; + tline = tline->next; + t->mac = NULL; + tail = &t->next; + } + } + *tail = NULL; + t = thead; + for (; t && (tt = t->next) != NULL; t = t->next) + switch (t->type) + { + case TOK_WHITESPACE: + if (tt->type == TOK_WHITESPACE) + { + t->next = delete_Token(tt); + } + break; + case TOK_ID: + if (tt->type == TOK_ID || tt->type == TOK_NUMBER) + { + char *tmp = nasm_strcat(t->text, tt->text); + nasm_free(t->text); + t->text = tmp; + t->next = delete_Token(tt); + } + break; + case TOK_NUMBER: + if (tt->type == TOK_NUMBER) + { + char *tmp = nasm_strcat(t->text, tt->text); + nasm_free(t->text); + t->text = tmp; + t->next = delete_Token(tt); + } + break; + } + + return thead; +} + +/* + * Expand all single-line macro calls made in the given line. + * Return the expanded version of the line. The original is deemed + * to be destroyed in the process. (In reality we'll just move + * Tokens from input to output a lot of the time, rather than + * actually bothering to destroy and replicate.) + */ +static Token * +expand_smacro(Token * tline) +{ + Token *t, *tt, *mstart, **tail, *thead; + SMacro *head = NULL, *m; + Token **params; + int *paramsize; + int nparam, sparam, brackets, rescan; + Token *org_tline = tline; + Context *ctx; + char *mname; + + /* + * Trick: we should avoid changing the start token pointer since it can + * be contained in "next" field of other token. Because of this + * we allocate a copy of first token and work with it; at the end of + * routine we copy it back + */ + if (org_tline) + { + tline = + new_Token(org_tline->next, org_tline->type, org_tline->text, + 0); + tline->mac = org_tline->mac; + nasm_free(org_tline->text); + org_tline->text = NULL; + } + + again: + tail = &thead; + thead = NULL; + + while (tline) + { /* main token loop */ + if ((mname = tline->text)) + { + /* if this token is a local macro, look in local context */ + if (tline->type == TOK_ID || tline->type == TOK_PREPROC_ID) + ctx = get_ctx(mname, TRUE); + else + ctx = NULL; + if (!ctx) + head = smacros[hash(mname)]; + else + head = ctx->localmac; + /* + * We've hit an identifier. As in is_mmacro below, we first + * check whether the identifier is a single-line macro at + * all, then think about checking for parameters if + * necessary. + */ + for (m = head; m; m = m->next) + if (!mstrcmp(m->name, mname, m->casesense)) + break; + if (m) + { + mstart = tline; + params = NULL; + paramsize = NULL; + if (m->nparam == 0) + { + /* + * Simple case: the macro is parameterless. Discard the + * one token that the macro call took, and push the + * expansion back on the to-do stack. + */ + if (!m->expansion) + { + if (!strcmp("__FILE__", m->name)) + { + long num = 0; + src_get(&num, &(tline->text)); + nasm_quote(&(tline->text)); + tline->type = TOK_STRING; + continue; + } + if (!strcmp("__LINE__", m->name)) + { + nasm_free(tline->text); + make_tok_num(tline, src_get_linnum()); + continue; + } + tline = delete_Token(tline); + continue; + } + } + else + { + /* + * Complicated case: at least one macro with this name + * exists and takes parameters. We must find the + * parameters in the call, count them, find the SMacro + * that corresponds to that form of the macro call, and + * substitute for the parameters when we expand. What a + * pain. + */ + tline = tline->next; + skip_white_(tline); + if (!tok_is_(tline, "(")) + { + /* + * This macro wasn't called with parameters: ignore + * the call. (Behaviour borrowed from gnu cpp.) + */ + tline = mstart; + m = NULL; + } + else + { + int paren = 0; + int white = 0; + brackets = 0; + nparam = 0; + tline = tline->next; + sparam = PARAM_DELTA; + params = nasm_malloc(sparam * sizeof(Token *)); + params[0] = tline; + paramsize = nasm_malloc(sparam * sizeof(int)); + paramsize[0] = 0; + for (;; tline = tline->next) + { /* parameter loop */ + if (!tline) + { + error(ERR_NONFATAL, + "macro call expects terminating `)'"); + break; + } + if (tline->type == TOK_WHITESPACE + && brackets <= 0) + { + if (paramsize[nparam]) + white++; + else + params[nparam] = tline->next; + continue; /* parameter loop */ + } + if (tline->type == TOK_OTHER + && tline->text[1] == 0) + { + char ch = tline->text[0]; + if (ch == ',' && !paren && brackets <= 0) + { + if (++nparam >= sparam) + { + sparam += PARAM_DELTA; + params = nasm_realloc(params, + sparam * sizeof(Token *)); + paramsize = nasm_realloc(paramsize, + sparam * sizeof(int)); + } + params[nparam] = tline->next; + paramsize[nparam] = 0; + white = 0; + continue; /* parameter loop */ + } + if (ch == '{' && + (brackets > 0 || (brackets == 0 && + !paramsize[nparam]))) + { + if (!(brackets++)) + { + params[nparam] = tline->next; + continue; /* parameter loop */ + } + } + if (ch == '}' && brackets > 0) + if (--brackets == 0) + { + brackets = -1; + continue; /* parameter loop */ + } + if (ch == '(' && !brackets) + paren++; + if (ch == ')' && brackets <= 0) + if (--paren < 0) + break; + } + if (brackets < 0) + { + brackets = 0; + error(ERR_NONFATAL, "braces do not " + "enclose all of macro parameter"); + } + paramsize[nparam] += white + 1; + white = 0; + } /* parameter loop */ + nparam++; + while (m && (m->nparam != nparam || + mstrcmp(m->name, mname, + m->casesense))) + m = m->next; + if (!m) + error(ERR_WARNING | ERR_WARN_MNP, + "macro `%s' exists, " + "but not taking %d parameters", + mstart->text, nparam); + } + } + if (m && m->in_progress) + m = NULL; + if (!m) /* in progess or didn't find '(' or wrong nparam */ + { + /* + * Design question: should we handle !tline, which + * indicates missing ')' here, or expand those + * macros anyway, which requires the (t) test a few + * lines down? + */ + nasm_free(params); + nasm_free(paramsize); + tline = mstart; + } + else + { + /* + * Expand the macro: we are placed on the last token of the + * call, so that we can easily split the call from the + * following tokens. We also start by pushing an SMAC_END + * token for the cycle removal. + */ + t = tline; + if (t) + { + tline = t->next; + t->next = NULL; + } + tt = new_Token(tline, TOK_SMAC_END, NULL, 0); + tt->mac = m; + m->in_progress = TRUE; + tline = tt; + for (t = m->expansion; t; t = t->next) + { + if (t->type >= TOK_SMAC_PARAM) + { + Token *pcopy = tline, **ptail = &pcopy; + Token *ttt, *pt; + int i; + + ttt = params[t->type - TOK_SMAC_PARAM]; + for (i = paramsize[t->type - TOK_SMAC_PARAM]; + --i >= 0;) + { + pt = *ptail = + new_Token(tline, ttt->type, ttt->text, + 0); + ptail = &pt->next; + ttt = ttt->next; + } + tline = pcopy; + } + else + { + tt = new_Token(tline, t->type, t->text, 0); + tline = tt; + } + } + + /* + * Having done that, get rid of the macro call, and clean + * up the parameters. + */ + nasm_free(params); + nasm_free(paramsize); + free_tlist(mstart); + continue; /* main token loop */ + } + } + } + + if (tline->type == TOK_SMAC_END) + { + tline->mac->in_progress = FALSE; + tline = delete_Token(tline); + } + else + { + t = *tail = tline; + tline = tline->next; + t->mac = NULL; + t->next = NULL; + tail = &t->next; + } + } + + /* + * Now scan the entire line and look for successive TOK_IDs that resulted + * after expansion (they can't be produced by tokenise()). The successive + * TOK_IDs should be concatenated. + * Also we look for %+ tokens and concatenate the tokens before and after + * them (without white spaces in between). + */ + t = thead; + rescan = 0; + while (t) + { + while (t && t->type != TOK_ID && t->type != TOK_PREPROC_ID) + t = t->next; + if (!t || !t->next) + break; + if (t->next->type == TOK_ID || + t->next->type == TOK_PREPROC_ID || + t->next->type == TOK_NUMBER) + { + char *p = nasm_strcat(t->text, t->next->text); + nasm_free(t->text); + t->next = delete_Token(t->next); + t->text = p; + rescan = 1; + } + else if (t->next->type == TOK_WHITESPACE && t->next->next && + t->next->next->type == TOK_PREPROC_ID && + strcmp(t->next->next->text, "%+") == 0) + { + /* free the next whitespace, the %+ token and next whitespace */ + int i; + for (i = 1; i <= 3; i++) + { + if (!t->next || (i != 2 && t->next->type != TOK_WHITESPACE)) + break; + t->next = delete_Token(t->next); + } /* endfor */ + } + else + t = t->next; + } + /* If we concatenaded something, re-scan the line for macros */ + if (rescan) + { + tline = thead; + goto again; + } + + if (org_tline) + { + if (thead) + { + *org_tline = *thead; + /* since we just gave text to org_line, don't free it */ + thead->text = NULL; + delete_Token(thead); + } + else + { + /* the expression expanded to empty line; + we can't return NULL for some reasons + we just set the line to a single WHITESPACE token. */ + memset(org_tline, 0, sizeof(*org_tline)); + org_tline->text = NULL; + org_tline->type = TOK_WHITESPACE; + } + thead = org_tline; + } + + return thead; +} + +/* + * Similar to expand_smacro but used exclusively with macro identifiers + * right before they are fetched in. The reason is that there can be + * identifiers consisting of several subparts. We consider that if there + * are more than one element forming the name, user wants a expansion, + * otherwise it will be left as-is. Example: + * + * %define %$abc cde + * + * the identifier %$abc will be left as-is so that the handler for %define + * will suck it and define the corresponding value. Other case: + * + * %define _%$abc cde + * + * In this case user wants name to be expanded *before* %define starts + * working, so we'll expand %$abc into something (if it has a value; + * otherwise it will be left as-is) then concatenate all successive + * PP_IDs into one. + */ +static Token * +expand_id(Token * tline) +{ + Token *cur, *oldnext = NULL; + + if (!tline || !tline->next) + return tline; + + cur = tline; + while (cur->next && + (cur->next->type == TOK_ID || + cur->next->type == TOK_PREPROC_ID || cur->next->type == TOK_NUMBER)) + cur = cur->next; + + /* If identifier consists of just one token, don't expand */ + if (cur == tline) + return tline; + + if (cur) + { + oldnext = cur->next; /* Detach the tail past identifier */ + cur->next = NULL; /* so that expand_smacro stops here */ + } + + tline = expand_smacro(tline); + + if (cur) + { + /* expand_smacro possibly changhed tline; re-scan for EOL */ + cur = tline; + while (cur && cur->next) + cur = cur->next; + if (cur) + cur->next = oldnext; + } + + return tline; +} + +/* + * Determine whether the given line constitutes a multi-line macro + * call, and return the MMacro structure called if so. Doesn't have + * to check for an initial label - that's taken care of in + * expand_mmacro - but must check numbers of parameters. Guaranteed + * to be called with tline->type == TOK_ID, so the putative macro + * name is easy to find. + */ +static MMacro * +is_mmacro(Token * tline, Token *** params_array) +{ + MMacro *head, *m; + Token **params; + int nparam; + + head = mmacros[hash(tline->text)]; + + /* + * Efficiency: first we see if any macro exists with the given + * name. If not, we can return NULL immediately. _Then_ we + * count the parameters, and then we look further along the + * list if necessary to find the proper MMacro. + */ + for (m = head; m; m = m->next) + if (!mstrcmp(m->name, tline->text, m->casesense)) + break; + if (!m) + return NULL; + + /* + * OK, we have a potential macro. Count and demarcate the + * parameters. + */ + count_mmac_params(tline->next, &nparam, ¶ms); + + /* + * So we know how many parameters we've got. Find the MMacro + * structure that handles this number. + */ + while (m) + { + if (m->nparam_min <= nparam && (m->plus || nparam <= m->nparam_max)) + { + /* + * This one is right. Just check if cycle removal + * prohibits us using it before we actually celebrate... + */ + if (m->in_progress) + { +#if 0 + error(ERR_NONFATAL, + "self-reference in multi-line macro `%s'", m->name); +#endif + nasm_free(params); + return NULL; + } + /* + * It's right, and we can use it. Add its default + * parameters to the end of our list if necessary. + */ + if (m->defaults && nparam < m->nparam_min + m->ndefs) + { + params = + nasm_realloc(params, + ((m->nparam_min + m->ndefs + 1) * sizeof(*params))); + while (nparam < m->nparam_min + m->ndefs) + { + params[nparam] = m->defaults[nparam - m->nparam_min]; + nparam++; + } + } + /* + * If we've gone over the maximum parameter count (and + * we're in Plus mode), ignore parameters beyond + * nparam_max. + */ + if (m->plus && nparam > m->nparam_max) + nparam = m->nparam_max; + /* + * Then terminate the parameter list, and leave. + */ + if (!params) + { /* need this special case */ + params = nasm_malloc(sizeof(*params)); + nparam = 0; + } + params[nparam] = NULL; + *params_array = params; + return m; + } + /* + * This one wasn't right: look for the next one with the + * same name. + */ + for (m = m->next; m; m = m->next) + if (!mstrcmp(m->name, tline->text, m->casesense)) + break; + } + + /* + * After all that, we didn't find one with the right number of + * parameters. Issue a warning, and fail to expand the macro. + */ + error(ERR_WARNING | ERR_WARN_MNP, + "macro `%s' exists, but not taking %d parameters", + tline->text, nparam); + nasm_free(params); + return NULL; +} + +/* + * Expand the multi-line macro call made by the given line, if + * there is one to be expanded. If there is, push the expansion on + * istk->expansion and return 1. Otherwise return 0. + */ +static int +expand_mmacro(Token * tline) +{ + Token *startline = tline; + Token *label = NULL; + int dont_prepend = 0; + Token **params, *t, *tt; + MMacro *m; + Line *l, *ll; + int i, nparam, *paramlen; + + t = tline; + skip_white_(t); +/* if (!tok_type_(t, TOK_ID)) Lino 02/25/02 */ + if (!tok_type_(t, TOK_ID) && !tok_type_(t, TOK_PREPROC_ID)) + return 0; + m = is_mmacro(t, ¶ms); + if (!m) + { + Token *last; + /* + * We have an id which isn't a macro call. We'll assume + * it might be a label; we'll also check to see if a + * colon follows it. Then, if there's another id after + * that lot, we'll check it again for macro-hood. + */ + label = last = t; + t = t->next; + if (tok_type_(t, TOK_WHITESPACE)) + last = t, t = t->next; + if (tok_is_(t, ":")) + { + dont_prepend = 1; + last = t, t = t->next; + if (tok_type_(t, TOK_WHITESPACE)) + last = t, t = t->next; + } + if (!tok_type_(t, TOK_ID) || (m = is_mmacro(t, ¶ms)) == NULL) + return 0; + last->next = NULL; + tline = t; + } + + /* + * Fix up the parameters: this involves stripping leading and + * trailing whitespace, then stripping braces if they are + * present. + */ + for (nparam = 0; params[nparam]; nparam++) + ; + paramlen = nparam ? nasm_malloc(nparam * sizeof(*paramlen)) : NULL; + + for (i = 0; params[i]; i++) + { + int brace = FALSE; + int comma = (!m->plus || i < nparam - 1); + + t = params[i]; + skip_white_(t); + if (tok_is_(t, "{")) + t = t->next, brace = TRUE, comma = FALSE; + params[i] = t; + paramlen[i] = 0; + while (t) + { + if (comma && t->type == TOK_OTHER && !strcmp(t->text, ",")) + break; /* ... because we have hit a comma */ + if (comma && t->type == TOK_WHITESPACE && tok_is_(t->next, ",")) + break; /* ... or a space then a comma */ + if (brace && t->type == TOK_OTHER && !strcmp(t->text, "}")) + break; /* ... or a brace */ + t = t->next; + paramlen[i]++; + } + } + + /* + * OK, we have a MMacro structure together with a set of + * parameters. We must now go through the expansion and push + * copies of each Line on to istk->expansion. Substitution of + * parameter tokens and macro-local tokens doesn't get done + * until the single-line macro substitution process; this is + * because delaying them allows us to change the semantics + * later through %rotate. + * + * First, push an end marker on to istk->expansion, mark this + * macro as in progress, and set up its invocation-specific + * variables. + */ + ll = nasm_malloc(sizeof(Line)); + ll->next = istk->expansion; + ll->finishes = m; + ll->first = NULL; + istk->expansion = ll; + + m->in_progress = TRUE; + m->params = params; + m->iline = tline; + m->nparam = nparam; + m->rotate = 0; + m->paramlen = paramlen; + m->unique = unique++; + m->lineno = 0; + + m->next_active = istk->mstk; + istk->mstk = m; + + for (l = m->expansion; l; l = l->next) + { + Token **tail; + + ll = nasm_malloc(sizeof(Line)); + ll->finishes = NULL; + ll->next = istk->expansion; + istk->expansion = ll; + tail = &ll->first; + + for (t = l->first; t; t = t->next) + { + Token *x = t; + if (t->type == TOK_PREPROC_ID && + t->text[1] == '0' && t->text[2] == '0') + { + dont_prepend = -1; + x = label; + if (!x) + continue; + } + tt = *tail = new_Token(NULL, x->type, x->text, 0); + tail = &tt->next; + } + *tail = NULL; + } + + /* + * If we had a label, push it on as the first line of + * the macro expansion. + */ + if (label) + { + if (dont_prepend < 0) + free_tlist(startline); + else + { + ll = nasm_malloc(sizeof(Line)); + ll->finishes = NULL; + ll->next = istk->expansion; + istk->expansion = ll; + ll->first = startline; + if (!dont_prepend) + { + while (label->next) + label = label->next; + label->next = tt = new_Token(NULL, TOK_OTHER, ":", 0); + } + } + } + + list->uplevel(m->nolist ? LIST_MACRO_NOLIST : LIST_MACRO); + + return 1; +} + +/* + * Since preprocessor always operate only on the line that didn't + * arrived yet, we should always use ERR_OFFBY1. Also since user + * won't want to see same error twice (preprocessing is done once + * per pass) we will want to show errors only during pass one. + */ +static void +error(int severity, const char *fmt, ...) +{ + va_list arg; + char buff[1024]; + + /* If we're in a dead branch of IF or something like it, ignore the error */ + if (istk && istk->conds && !emitting(istk->conds->state)) + return; + + va_start(arg, fmt); + vsprintf(buff, fmt, arg); + va_end(arg); + + if (istk && istk->mstk && istk->mstk->name) + _error(severity | ERR_PASS1, "(%s:%d) %s", istk->mstk->name, + istk->mstk->lineno, buff); + else + _error(severity | ERR_PASS1, "%s", buff); +} + +static void +pp_reset(char *file, int apass, efunc errfunc, evalfunc eval, + ListGen * listgen) +{ + int h; + + _error = errfunc; + cstk = NULL; + istk = nasm_malloc(sizeof(Include)); + istk->next = NULL; + istk->conds = NULL; + istk->expansion = NULL; + istk->mstk = NULL; + istk->fp = fopen(file, "r"); + istk->fname = NULL; + src_set_fname(nasm_strdup(file)); + src_set_linnum(0); + istk->lineinc = 1; + if (!istk->fp) + error(ERR_FATAL | ERR_NOFILE, "unable to open input file `%s'", file); + defining = NULL; + for (h = 0; h < NHASH; h++) + { + mmacros[h] = NULL; + smacros[h] = NULL; + } + unique = 0; + if (tasm_compatible_mode) { + stdmacpos = stdmac; + } else { + stdmacpos = &stdmac[TASM_MACRO_COUNT]; + } + any_extrastdmac = (extrastdmac != NULL); + list = listgen; + evaluate = eval; + pass = apass; +} + +static char * +pp_getline(void) +{ + char *line; + Token *tline; + + while (1) + { + /* + * Fetch a tokenised line, either from the macro-expansion + * buffer or from the input file. + */ + tline = NULL; + while (istk->expansion && istk->expansion->finishes) + { + Line *l = istk->expansion; + if (!l->finishes->name && l->finishes->in_progress > 1) + { + Line *ll; + + /* + * This is a macro-end marker for a macro with no + * name, which means it's not really a macro at all + * but a %rep block, and the `in_progress' field is + * more than 1, meaning that we still need to + * repeat. (1 means the natural last repetition; 0 + * means termination by %exitrep.) We have + * therefore expanded up to the %endrep, and must + * push the whole block on to the expansion buffer + * again. We don't bother to remove the macro-end + * marker: we'd only have to generate another one + * if we did. + */ + l->finishes->in_progress--; + for (l = l->finishes->expansion; l; l = l->next) + { + Token *t, *tt, **tail; + + ll = nasm_malloc(sizeof(Line)); + ll->next = istk->expansion; + ll->finishes = NULL; + ll->first = NULL; + tail = &ll->first; + + for (t = l->first; t; t = t->next) + { + if (t->text || t->type == TOK_WHITESPACE) + { + tt = *tail = new_Token(NULL, t->type, t->text, 0); + tail = &tt->next; + } + } + + istk->expansion = ll; + } + } + else + { + /* + * Check whether a `%rep' was started and not ended + * within this macro expansion. This can happen and + * should be detected. It's a fatal error because + * I'm too confused to work out how to recover + * sensibly from it. + */ + if (defining) + { + if (defining->name) + error(ERR_PANIC, "defining with name in expansion"); + else if (istk->mstk->name) + error(ERR_FATAL, "`%%rep' without `%%endrep' within" + " expansion of macro `%s'", istk->mstk->name); + } + + /* + * FIXME: investigate the relationship at this point between + * istk->mstk and l->finishes + */ + { + MMacro *m = istk->mstk; + istk->mstk = m->next_active; + if (m->name) + { + /* + * This was a real macro call, not a %rep, and + * therefore the parameter information needs to + * be freed. + */ + nasm_free(m->params); + free_tlist(m->iline); + nasm_free(m->paramlen); + l->finishes->in_progress = FALSE; + } + else + free_mmacro(m); + } + istk->expansion = l->next; + nasm_free(l); + list->downlevel(LIST_MACRO); + } + } + while (1) + { /* until we get a line we can use */ + + if (istk->expansion) + { /* from a macro expansion */ + char *p; + Line *l = istk->expansion; + if (istk->mstk) + istk->mstk->lineno++; + tline = l->first; + istk->expansion = l->next; + nasm_free(l); + p = detoken(tline, FALSE); + list->line(LIST_MACRO, p); + nasm_free(p); + break; + } + line = read_line(); + if (line) + { /* from the current input file */ + line = prepreproc(line); + tline = tokenise(line); + nasm_free(line); + break; + } + /* + * The current file has ended; work down the istk + */ + { + Include *i = istk; + fclose(i->fp); + if (i->conds) + error(ERR_FATAL, "expected `%%endif' before end of file"); + /* only set line and file name if there's a next node */ + if (i->next) + { + src_set_linnum(i->lineno); + nasm_free(src_set_fname(i->fname)); + } + istk = i->next; + list->downlevel(LIST_INCLUDE); + nasm_free(i); + if (!istk) + return NULL; + } + } + + /* + * We must expand MMacro parameters and MMacro-local labels + * _before_ we plunge into directive processing, to cope + * with things like `%define something %1' such as STRUC + * uses. Unless we're _defining_ a MMacro, in which case + * those tokens should be left alone to go into the + * definition; and unless we're in a non-emitting + * condition, in which case we don't want to meddle with + * anything. + */ + if (!defining && !(istk->conds && !emitting(istk->conds->state))) + tline = expand_mmac_params(tline); + + /* + * Check the line to see if it's a preprocessor directive. + */ + if (do_directive(tline) == DIRECTIVE_FOUND) + { + continue; + } + else if (defining) + { + /* + * We're defining a multi-line macro. We emit nothing + * at all, and just + * shove the tokenised line on to the macro definition. + */ + Line *l = nasm_malloc(sizeof(Line)); + l->next = defining->expansion; + l->first = tline; + l->finishes = FALSE; + defining->expansion = l; + continue; + } + else if (istk->conds && !emitting(istk->conds->state)) + { + /* + * We're in a non-emitting branch of a condition block. + * Emit nothing at all, not even a blank line: when we + * emerge from the condition we'll give a line-number + * directive so we keep our place correctly. + */ + free_tlist(tline); + continue; + } + else if (istk->mstk && !istk->mstk->in_progress) + { + /* + * We're in a %rep block which has been terminated, so + * we're walking through to the %endrep without + * emitting anything. Emit nothing at all, not even a + * blank line: when we emerge from the %rep block we'll + * give a line-number directive so we keep our place + * correctly. + */ + free_tlist(tline); + continue; + } + else + { + tline = expand_smacro(tline); + if (!expand_mmacro(tline)) + { + /* + * De-tokenise the line again, and emit it. + */ + line = detoken(tline, TRUE); + free_tlist(tline); + break; + } + else + { + continue; /* expand_mmacro calls free_tlist */ + } + } + } + + return line; +} + +static void +pp_cleanup(int pass) +{ + int h; + + if (defining) + { + error(ERR_NONFATAL, "end of file while still defining macro `%s'", + defining->name); + free_mmacro(defining); + } + while (cstk) + ctx_pop(); + for (h = 0; h < NHASH; h++) + { + while (mmacros[h]) + { + MMacro *m = mmacros[h]; + mmacros[h] = mmacros[h]->next; + free_mmacro(m); + } + while (smacros[h]) + { + SMacro *s = smacros[h]; + smacros[h] = smacros[h]->next; + nasm_free(s->name); + free_tlist(s->expansion); + nasm_free(s); + } + } + while (istk) + { + Include *i = istk; + istk = istk->next; + fclose(i->fp); + nasm_free(i->fname); + nasm_free(i); + } + while (cstk) + ctx_pop(); + if (pass == 0) + { + free_llist(predef); + delete_Blocks(); + } +} + +void +pp_include_path(char *path) +{ + IncPath *i; + + i = nasm_malloc(sizeof(IncPath)); + i->path = nasm_strdup(path); + i->next = ipath; + ipath = i; +} + +void +pp_pre_include(char *fname) +{ + Token *inc, *space, *name; + Line *l; + + name = new_Token(NULL, TOK_INTERNAL_STRING, fname, 0); + space = new_Token(name, TOK_WHITESPACE, NULL, 0); + inc = new_Token(space, TOK_PREPROC_ID, "%include", 0); + + l = nasm_malloc(sizeof(Line)); + l->next = predef; + l->first = inc; + l->finishes = FALSE; + predef = l; +} + +void +pp_pre_define(char *definition) +{ + Token *def, *space; + Line *l; + char *equals; + + equals = strchr(definition, '='); + space = new_Token(NULL, TOK_WHITESPACE, NULL, 0); + def = new_Token(space, TOK_PREPROC_ID, "%define", 0); + if (equals) + *equals = ' '; + space->next = tokenise(definition); + if (equals) + *equals = '='; + + l = nasm_malloc(sizeof(Line)); + l->next = predef; + l->first = def; + l->finishes = FALSE; + predef = l; +} + +void +pp_pre_undefine(char *definition) +{ + Token *def, *space; + Line *l; + + space = new_Token(NULL, TOK_WHITESPACE, NULL, 0); + def = new_Token(space, TOK_PREPROC_ID, "%undef", 0); + + l = nasm_malloc(sizeof(Line)); + l->next = predef; + l->first = def; + l->finishes = FALSE; + predef = l; +} + +void +pp_extra_stdmac(const char **macros) +{ + extrastdmac = macros; +} + +static void +make_tok_num(Token * tok, long val) +{ + char numbuf[20]; + sprintf(numbuf, "%ld", val); + tok->text = nasm_strdup(numbuf); + tok->type = TOK_NUMBER; +} + +Preproc nasmpp = { + pp_reset, + pp_getline, + pp_cleanup +}; diff --git a/src/preprocs/nasm/nasm-pp.h b/src/preprocs/nasm/nasm-pp.h new file mode 100644 index 00000000..0b7df114 --- /dev/null +++ b/src/preprocs/nasm/nasm-pp.h @@ -0,0 +1,20 @@ +/* preproc.h header file for preproc.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_PREPROC_H +#define NASM_PREPROC_H + +void pp_include_path (char *); +void pp_pre_include (char *); +void pp_pre_define (char *); +void pp_pre_undefine (char *); +void pp_extra_stdmac (const char **); + +extern Preproc nasmpp; + +#endif diff --git a/src/preprocs/nasm/nasm.h b/src/preprocs/nasm/nasm.h new file mode 100644 index 00000000..8f4f293c --- /dev/null +++ b/src/preprocs/nasm/nasm.h @@ -0,0 +1,850 @@ +/* nasm.h main header file for the Netwide Assembler: inter-module interface + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version: 27/iii/95 by Simon Tatham + */ + +#ifndef NASM_NASM_H +#define NASM_NASM_H + +#include +#include "version.h" /* generated NASM version macros */ + +#ifndef NULL +#define NULL 0 +#endif + +#ifndef FALSE +#define FALSE 0 /* comes in handy */ +#endif +#ifndef TRUE +#define TRUE 1 +#endif + +#define NO_SEG -1L /* null segment value */ +#define SEG_ABS 0x40000000L /* mask for far-absolute segments */ + +#ifndef FILENAME_MAX +#define FILENAME_MAX 256 +#endif + +#ifndef PREFIX_MAX +#define PREFIX_MAX 10 +#endif + +#ifndef POSTFIX_MAX +#define POSTFIX_MAX 10 +#endif + + + +/* + * Name pollution problems: on Digital UNIX pulls in some + * strange hardware header file which sees fit to define R_SP. We + * undefine it here so as not to break the enum below. + */ +#ifdef R_SP +#undef R_SP +#endif + +/* + * We must declare the existence of this structure type up here, + * since we have to reference it before we define it... + */ +struct ofmt; + +/* + * ------------------------- + * Error reporting functions + * ------------------------- + */ + +/* + * An error reporting function should look like this. + */ +typedef void (*efunc) (int severity, const char *fmt, ...); + +/* + * These are the error severity codes which get passed as the first + * argument to an efunc. + */ + +#define ERR_DEBUG 0x00000008 /* put out debugging message */ +#define ERR_WARNING 0x00000000 /* warn only: no further action */ +#define ERR_NONFATAL 0x00000001 /* terminate assembly after phase */ +#define ERR_FATAL 0x00000002 /* instantly fatal: exit with error */ +#define ERR_PANIC 0x00000003 /* internal error: panic instantly + * and dump core for reference */ +#define ERR_MASK 0x0000000F /* mask off the above codes */ +#define ERR_NOFILE 0x00000010 /* don't give source file name/line */ +#define ERR_USAGE 0x00000020 /* print a usage message */ +#define ERR_PASS1 0x00000040 /* only print this error on pass one */ + +/* + * These codes define specific types of suppressible warning. + */ + +#define ERR_WARN_MASK 0x0000FF00 /* the mask for this feature */ +#define ERR_WARN_SHR 8 /* how far to shift right */ + +#define ERR_WARN_MNP 0x00000100 /* macro-num-parameters warning */ +#define ERR_WARN_MSR 0x00000200 /* macro self-reference */ +#define ERR_WARN_OL 0x00000300 /* orphan label (no colon, and + * alone on line) */ +#define ERR_WARN_NOV 0x00000400 /* numeric overflow */ +#define ERR_WARN_GNUELF 0x00000500 /* using GNU ELF extensions */ +#define ERR_WARN_MAX 5 /* the highest numbered one */ + +/* + * ----------------------- + * Other function typedefs + * ----------------------- + */ + +/* + * A label-lookup function should look like this. + */ +typedef int (*lfunc) (char *label, long *segment, long *offset); + +/* + * And a label-definition function like this. The boolean parameter + * `is_norm' states whether the label is a `normal' label (which + * should affect the local-label system), or something odder like + * an EQU or a segment-base symbol, which shouldn't. + */ +typedef void (*ldfunc) (char *label, long segment, long offset, char *special, + int is_norm, int isextrn, struct ofmt *ofmt, + efunc error); + +/* + * List-file generators should look like this: + */ +typedef struct { + /* + * Called to initialise the listing file generator. Before this + * is called, the other routines will silently do nothing when + * called. The `char *' parameter is the file name to write the + * listing to. + */ + void (*init) (char *, efunc); + + /* + * Called to clear stuff up and close the listing file. + */ + void (*cleanup) (void); + + /* + * Called to output binary data. Parameters are: the offset; + * the data; the data type. Data types are similar to the + * output-format interface, only OUT_ADDRESS will _always_ be + * displayed as if it's relocatable, so ensure that any non- + * relocatable address has been converted to OUT_RAWDATA by + * then. Note that OUT_RAWDATA+0 is a valid data type, and is a + * dummy call used to give the listing generator an offset to + * work with when doing things like uplevel(LIST_TIMES) or + * uplevel(LIST_INCBIN). + */ + void (*output) (long, const void *, unsigned long); + + /* + * Called to send a text line to the listing generator. The + * `int' parameter is LIST_READ or LIST_MACRO depending on + * whether the line came directly from an input file or is the + * result of a multi-line macro expansion. + */ + void (*line) (int, char *); + + /* + * Called to change one of the various levelled mechanisms in + * the listing generator. LIST_INCLUDE and LIST_MACRO can be + * used to increase the nesting level of include files and + * macro expansions; LIST_TIMES and LIST_INCBIN switch on the + * two binary-output-suppression mechanisms for large-scale + * pseudo-instructions. + * + * LIST_MACRO_NOLIST is synonymous with LIST_MACRO except that + * it indicates the beginning of the expansion of a `nolist' + * macro, so anything under that level won't be expanded unless + * it includes another file. + */ + void (*uplevel) (int); + + /* + * Reverse the effects of uplevel. + */ + void (*downlevel) (int); +} ListGen; + +/* + * The expression evaluator must be passed a scanner function; a + * standard scanner is provided as part of nasmlib.c. The + * preprocessor will use a different one. Scanners, and the + * token-value structures they return, look like this. + * + * The return value from the scanner is always a copy of the + * `t_type' field in the structure. + */ +struct tokenval { + int t_type; + long t_integer, t_inttwo; + char *t_charptr; +}; +typedef int (*scanner) (void *private_data, struct tokenval *tv); + +/* + * Token types returned by the scanner, in addition to ordinary + * ASCII character values, and zero for end-of-string. + */ +enum { /* token types, other than chars */ + TOKEN_INVALID = -1, /* a placeholder value */ + TOKEN_EOS = 0, /* end of string */ + TOKEN_EQ = '=', TOKEN_GT = '>', TOKEN_LT = '<', /* aliases */ + TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, /* major token types */ + TOKEN_ERRNUM, /* numeric constant with error in */ + TOKEN_HERE, TOKEN_BASE, /* $ and $$ */ + TOKEN_SPECIAL, /* BYTE, WORD, DWORD, FAR, NEAR, etc */ + TOKEN_PREFIX, /* A32, O16, LOCK, REPNZ, TIMES, etc */ + TOKEN_SHL, TOKEN_SHR, /* << and >> */ + TOKEN_SDIV, TOKEN_SMOD, /* // and %% */ + TOKEN_GE, TOKEN_LE, TOKEN_NE, /* >=, <= and <> (!= is same as <>) */ + TOKEN_DBL_AND, TOKEN_DBL_OR, TOKEN_DBL_XOR, /* &&, || and ^^ */ + TOKEN_SEG, TOKEN_WRT, /* SEG and WRT */ + TOKEN_FLOAT /* floating-point constant */ +}; + +typedef struct { + long segment; + long offset; + int known; +} loc_t; + +/* + * Expression-evaluator datatype. Expressions, within the + * evaluator, are stored as an array of these beasts, terminated by + * a record with type==0. Mostly, it's a vector type: each type + * denotes some kind of a component, and the value denotes the + * multiple of that component present in the expression. The + * exception is the WRT type, whose `value' field denotes the + * segment to which the expression is relative. These segments will + * be segment-base types, i.e. either odd segment values or SEG_ABS + * types. So it is still valid to assume that anything with a + * `value' field of zero is insignificant. + */ +typedef struct { + long type; /* a register, or EXPR_xxx */ + long value; /* must be >= 32 bits */ +} expr; + +/* + * The evaluator can also return hints about which of two registers + * used in an expression should be the base register. See also the + * `operand' structure. + */ +struct eval_hints { + int base; + int type; +}; + +/* + * The actual expression evaluator function looks like this. When + * called, it expects the first token of its expression to already + * be in `*tv'; if it is not, set tv->t_type to TOKEN_INVALID and + * it will start by calling the scanner. + * + * If a forward reference happens during evaluation, the evaluator + * must set `*fwref' to TRUE if `fwref' is non-NULL. + * + * `critical' is non-zero if the expression may not contain forward + * references. The evaluator will report its own error if this + * occurs; if `critical' is 1, the error will be "symbol not + * defined before use", whereas if `critical' is 2, the error will + * be "symbol undefined". + * + * If `critical' has bit 8 set (in addition to its main value: 0x101 + * and 0x102 correspond to 1 and 2) then an extended expression + * syntax is recognised, in which relational operators such as =, < + * and >= are accepted, as well as low-precedence logical operators + * &&, ^^ and ||. + * + * If `hints' is non-NULL, it gets filled in with some hints as to + * the base register in complex effective addresses. + */ +#define CRITICAL 0x100 +typedef expr *(*evalfunc) (scanner sc, void *scprivate, struct tokenval *tv, + int *fwref, int critical, efunc error, + struct eval_hints *hints); + +/* + * Special values for expr->type. ASSUMPTION MADE HERE: the number + * of distinct register names (i.e. possible "type" fields for an + * expr structure) does not exceed 124 (EXPR_REG_START through + * EXPR_REG_END). + */ +#define EXPR_REG_START 1 +#define EXPR_REG_END 124 +#define EXPR_UNKNOWN 125L /* for forward references */ +#define EXPR_SIMPLE 126L +#define EXPR_WRT 127L +#define EXPR_SEGBASE 128L + +/* + * Preprocessors ought to look like this: + */ +typedef struct { + /* + * Called at the start of a pass; given a file name, the number + * of the pass, an error reporting function, an evaluator + * function, and a listing generator to talk to. + */ + void (*reset) (char *, int, efunc, evalfunc, ListGen *); + + /* + * Called to fetch a line of preprocessed source. The line + * returned has been malloc'ed, and so should be freed after + * use. + */ + char *(*getline) (void); + + /* + * Called at the end of a pass. + */ + void (*cleanup) (int); +} Preproc; + +/* + * ---------------------------------------------------------------- + * Some lexical properties of the NASM source language, included + * here because they are shared between the parser and preprocessor + * ---------------------------------------------------------------- + */ + +/* + * isidstart matches any character that may start an identifier, and isidchar + * matches any character that may appear at places other than the start of an + * identifier. E.g. a period may only appear at the start of an identifier + * (for local labels), whereas a number may appear anywhere *but* at the + * start. + */ + +#define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' \ + || (c)=='@' ) +#define isidchar(c) ( isidstart(c) || isdigit(c) || (c)=='$' || (c)=='#' \ + || (c)=='~' ) + +/* Ditto for numeric constants. */ + +#define isnumstart(c) ( isdigit(c) || (c)=='$' ) +#define isnumchar(c) ( isalnum(c) ) + +/* This returns the numeric value of a given 'digit'. */ + +#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0') + +/* + * Data-type flags that get passed to listing-file routines. + */ +enum { + LIST_READ, LIST_MACRO, LIST_MACRO_NOLIST, LIST_INCLUDE, + LIST_INCBIN, LIST_TIMES +}; + +/* + * ----------------------------------------------------------- + * Format of the `insn' structure returned from `parser.c' and + * passed into `assemble.c' + * ----------------------------------------------------------- + */ + +/* + * Here we define the operand types. These are implemented as bit + * masks, since some are subsets of others; e.g. AX in a MOV + * instruction is a special operand type, whereas AX in other + * contexts is just another 16-bit register. (Also, consider CL in + * shift instructions, DX in OUT, etc.) + */ + +/* size, and other attributes, of the operand */ +#define BITS8 0x00000001L +#define BITS16 0x00000002L +#define BITS32 0x00000004L +#define BITS64 0x00000008L /* FPU only */ +#define BITS80 0x00000010L /* FPU only */ +#define FAR 0x00000020L /* grotty: this means 16:16 or */ + /* 16:32, like in CALL/JMP */ +#define NEAR 0x00000040L +#define SHORT 0x00000080L /* and this means what it says :) */ + +#define SIZE_MASK 0x000000FFL /* all the size attributes */ +#define NON_SIZE (~SIZE_MASK) + +#define TO 0x00000100L /* reverse effect in FADD, FSUB &c */ +#define COLON 0x00000200L /* operand is followed by a colon */ +#define STRICT 0x00000400L /* do not optimize this operand */ + +/* type of operand: memory reference, register, etc. */ +#define MEMORY 0x00204000L +#define REGISTER 0x00001000L /* register number in 'basereg' */ +#define IMMEDIATE 0x00002000L + +#define REGMEM 0x00200000L /* for r/m, ie EA, operands */ +#define REGNORM 0x00201000L /* 'normal' reg, qualifies as EA */ +#define REG8 0x00201001L +#define REG16 0x00201002L +#define REG32 0x00201004L +#define MMXREG 0x00201008L /* MMX registers */ +#define XMMREG 0x00201010L /* XMM Katmai reg */ +#define FPUREG 0x01000000L /* floating point stack registers */ +#define FPU0 0x01000800L /* FPU stack register zero */ + +/* special register operands: these may be treated differently */ +#define REG_SMASK 0x00070000L /* a mask for the following */ +#define REG_ACCUM 0x00211000L /* accumulator: AL, AX or EAX */ +#define REG_AL 0x00211001L /* REG_ACCUM | BITSxx */ +#define REG_AX 0x00211002L /* ditto */ +#define REG_EAX 0x00211004L /* and again */ +#define REG_COUNT 0x00221000L /* counter: CL, CX or ECX */ +#define REG_CL 0x00221001L /* REG_COUNT | BITSxx */ +#define REG_CX 0x00221002L /* ditto */ +#define REG_ECX 0x00221004L /* another one */ +#define REG_DX 0x00241002L +#define REG_SREG 0x00081002L /* any segment register */ +#define REG_CS 0x01081002L /* CS */ +#define REG_DESS 0x02081002L /* DS, ES, SS (non-CS 86 registers) */ +#define REG_FSGS 0x04081002L /* FS, GS (386 extended registers) */ +#define REG_SEG67 0x08081002L /* Non-implemented segment registers */ +#define REG_CDT 0x00101004L /* CRn, DRn and TRn */ +#define REG_CREG 0x08101004L /* CRn */ +#define REG_DREG 0x10101004L /* DRn */ +#define REG_TREG 0x20101004L /* TRn */ + +/* special type of EA */ +#define MEM_OFFS 0x00604000L /* simple [address] offset */ + +/* special type of immediate operand */ +#define ONENESS 0x00800000L /* so UNITY == IMMEDIATE | ONENESS */ +#define UNITY 0x00802000L /* for shift/rotate instructions */ +#define BYTENESS 0x40000000L /* so SBYTE == IMMEDIATE | BYTENESS */ +#define SBYTE 0x40002000L /* for op r16/32,immediate instrs. */ + +/* Register names automatically generated from regs.dat */ +#include "regs.h" + +enum { /* condition code names */ + C_A, C_AE, C_B, C_BE, C_C, C_E, C_G, C_GE, C_L, C_LE, C_NA, C_NAE, + C_NB, C_NBE, C_NC, C_NE, C_NG, C_NGE, C_NL, C_NLE, C_NO, C_NP, + C_NS, C_NZ, C_O, C_P, C_PE, C_PO, C_S, C_Z +}; + +/* + * Note that because segment registers may be used as instruction + * prefixes, we must ensure the enumerations for prefixes and + * register names do not overlap. + */ +enum { /* instruction prefixes */ + PREFIX_ENUM_START = REG_ENUM_LIMIT, + P_A16 = PREFIX_ENUM_START, P_A32, P_LOCK, P_O16, P_O32, P_REP, P_REPE, + P_REPNE, P_REPNZ, P_REPZ, P_TIMES +}; + +enum { /* extended operand types */ + EOT_NOTHING, EOT_DB_STRING, EOT_DB_NUMBER +}; + +enum { /* special EA flags */ + EAF_BYTEOFFS = 1, /* force offset part to byte size */ + EAF_WORDOFFS = 2, /* force offset part to [d]word size */ + EAF_TIMESTWO = 4 /* really do EAX*2 not EAX+EAX */ +}; + +enum { /* values for `hinttype' */ + EAH_NOHINT = 0, /* no hint at all - our discretion */ + EAH_MAKEBASE = 1, /* try to make given reg the base */ + EAH_NOTBASE = 2 /* try _not_ to make reg the base */ +}; + +typedef struct { /* operand to an instruction */ + long type; /* type of operand */ + int addr_size; /* 0 means default; 16; 32 */ + int basereg, indexreg, scale; /* registers and scale involved */ + int hintbase, hinttype; /* hint as to real base register */ + long segment; /* immediate segment, if needed */ + long offset; /* any immediate number */ + long wrt; /* segment base it's relative to */ + int eaflags; /* special EA flags */ + int opflags; /* see OPFLAG_* defines below */ +} operand; + +#define OPFLAG_FORWARD 1 /* operand is a forward reference */ +#define OPFLAG_EXTERN 2 /* operand is an external reference */ + +typedef struct extop { /* extended operand */ + struct extop *next; /* linked list */ + long type; /* defined above */ + char *stringval; /* if it's a string, then here it is */ + int stringlen; /* ... and here's how long it is */ + long segment; /* if it's a number/address, then... */ + long offset; /* ... it's given here ... */ + long wrt; /* ... and here */ +} extop; + +#define MAXPREFIX 4 + +typedef struct { /* an instruction itself */ + char *label; /* the label defined, or NULL */ + int prefixes[MAXPREFIX]; /* instruction prefixes, if any */ + int nprefix; /* number of entries in above */ + int opcode; /* the opcode - not just the string */ + int condition; /* the condition code, if Jcc/SETcc */ + int operands; /* how many operands? 0-3 + * (more if db et al) */ + operand oprs[3]; /* the operands, defined as above */ + extop *eops; /* extended operands */ + int eops_float; /* true if DD and floating */ + long times; /* repeat count (TIMES prefix) */ + int forw_ref; /* is there a forward reference? */ +} insn; + +enum geninfo { GI_SWITCH }; +/* + * ------------------------------------------------------------ + * The data structure defining an output format driver, and the + * interfaces to the functions therein. + * ------------------------------------------------------------ + */ + +struct ofmt { + /* + * This is a short (one-liner) description of the type of + * output generated by the driver. + */ + const char *fullname; + + /* + * This is a single keyword used to select the driver. + */ + const char *shortname; + + /* + * this is reserved for out module specific help. + * It is set to NULL in all the out modules but is not implemented + * in the main program + */ + const char *helpstring; + + /* + * this is a pointer to the first element of the debug information + */ + struct dfmt **debug_formats; + + /* + * and a pointer to the element that is being used + * note: this is set to the default at compile time and changed if the + * -F option is selected. If developing a set of new debug formats for + * an output format, be sure to set this to whatever default you want + * + */ + struct dfmt *current_dfmt; + + /* + * This, if non-NULL, is a NULL-terminated list of `char *'s + * pointing to extra standard macros supplied by the object + * format (e.g. a sensible initial default value of __SECT__, + * and user-level equivalents for any format-specific + * directives). + */ + const char **stdmac; + + /* + * This procedure is called at the start of an output session. + * It tells the output format what file it will be writing to, + * what routine to report errors through, and how to interface + * to the label manager and expression evaluator if necessary. + * It also gives it a chance to do other initialisation. + */ + void (*init) (FILE *fp, efunc error, ldfunc ldef, evalfunc eval); + + /* + * This procedure is called to pass generic information to the + * object file. The first parameter gives the information type + * (currently only command line switches) + * and the second parameter gives the value. This function returns + * 1 if recognized, 0 if unrecognized + */ + int (*setinfo)(enum geninfo type, char **string); + + /* + * This procedure is called by assemble() to write actual + * generated code or data to the object file. Typically it + * doesn't have to actually _write_ it, just store it for + * later. + * + * The `type' argument specifies the type of output data, and + * usually the size as well: its contents are described below. + */ + void (*output) (long segto, const void *data, unsigned long type, + long segment, long wrt); + + /* + * This procedure is called once for every symbol defined in + * the module being assembled. It gives the name and value of + * the symbol, in NASM's terms, and indicates whether it has + * been declared to be global. Note that the parameter "name", + * when passed, will point to a piece of static storage + * allocated inside the label manager - it's safe to keep using + * that pointer, because the label manager doesn't clean up + * until after the output driver has. + * + * Values of `is_global' are: 0 means the symbol is local; 1 + * means the symbol is global; 2 means the symbol is common (in + * which case `offset' holds the _size_ of the variable). + * Anything else is available for the output driver to use + * internally. + * + * This routine explicitly _is_ allowed to call the label + * manager to define further symbols, if it wants to, even + * though it's been called _from_ the label manager. That much + * re-entrancy is guaranteed in the label manager. However, the + * label manager will in turn call this routine, so it should + * be prepared to be re-entrant itself. + * + * The `special' parameter contains special information passed + * through from the command that defined the label: it may have + * been an EXTERN, a COMMON or a GLOBAL. The distinction should + * be obvious to the output format from the other parameters. + */ + void (*symdef) (char *name, long segment, long offset, int is_global, + char *special); + + /* + * This procedure is called when the source code requests a + * segment change. It should return the corresponding segment + * _number_ for the name, or NO_SEG if the name is not a valid + * segment name. + * + * It may also be called with NULL, in which case it is to + * return the _default_ section number for starting assembly in. + * + * It is allowed to modify the string it is given a pointer to. + * + * It is also allowed to specify a default instruction size for + * the segment, by setting `*bits' to 16 or 32. Or, if it + * doesn't wish to define a default, it can leave `bits' alone. + */ + long (*section) (char *name, int pass, int *bits); + + /* + * This procedure is called to modify the segment base values + * returned from the SEG operator. It is given a segment base + * value (i.e. a segment value with the low bit set), and is + * required to produce in return a segment value which may be + * different. It can map segment bases to absolute numbers by + * means of returning SEG_ABS types. + * + * It should return NO_SEG if the segment base cannot be + * determined; the evaluator (which calls this routine) is + * responsible for throwing an error condition if that occurs + * in pass two or in a critical expression. + */ + long (*segbase) (long segment); + + /* + * This procedure is called to allow the output driver to + * process its own specific directives. When called, it has the + * directive word in `directive' and the parameter string in + * `value'. It is called in both assembly passes, and `pass' + * will be either 1 or 2. + * + * This procedure should return zero if it does not _recognise_ + * the directive, so that the main program can report an error. + * If it recognises the directive but then has its own errors, + * it should report them itself and then return non-zero. It + * should also return non-zero if it correctly processes the + * directive. + */ + int (*directive) (char *directive, char *value, int pass); + + /* + * This procedure is called before anything else - even before + * the "init" routine - and is passed the name of the input + * file from which this output file is being generated. It + * should return its preferred name for the output file in + * `outname', if outname[0] is not '\0', and do nothing to + * `outname' otherwise. Since it is called before the driver is + * properly initialised, it has to be passed its error handler + * separately. + * + * This procedure may also take its own copy of the input file + * name for use in writing the output file: it is _guaranteed_ + * that it will be called before the "init" routine. + * + * The parameter `outname' points to an area of storage + * guaranteed to be at least FILENAME_MAX in size. + */ + void (*filename) (char *inname, char *outname, efunc error); + + /* + * This procedure is called after assembly finishes, to allow + * the output driver to clean itself up and free its memory. + * Typically, it will also be the point at which the object + * file actually gets _written_. + * + * One thing the cleanup routine should always do is to close + * the output file pointer. + */ + void (*cleanup) (int debuginfo); +}; + +/* + * values for the `type' parameter to an output function. Each one + * must have the actual number of _bytes_ added to it. + * + * Exceptions are OUT_RELxADR, which denote an x-byte relocation + * which will be a relative jump. For this we need to know the + * distance in bytes from the start of the relocated record until + * the end of the containing instruction. _This_ is what is stored + * in the size part of the parameter, in this case. + * + * Also OUT_RESERVE denotes reservation of N bytes of BSS space, + * and the contents of the "data" parameter is irrelevant. + * + * The "data" parameter for the output function points to a "long", + * containing the address in question, unless the type is + * OUT_RAWDATA, in which case it points to an "unsigned char" + * array. + */ +#define OUT_RAWDATA 0x00000000UL +#define OUT_ADDRESS 0x10000000UL +#define OUT_REL2ADR 0x20000000UL +#define OUT_REL4ADR 0x30000000UL +#define OUT_RESERVE 0x40000000UL +#define OUT_TYPMASK 0xF0000000UL +#define OUT_SIZMASK 0x0FFFFFFFUL + +/* + * ------------------------------------------------------------ + * The data structure defining a debug format driver, and the + * interfaces to the functions therein. + * ------------------------------------------------------------ + */ + +struct dfmt { + + /* + * This is a short (one-liner) description of the type of + * output generated by the driver. + */ + const char *fullname; + + /* + * This is a single keyword used to select the driver. + */ + const char *shortname; + + + /* + * init - called initially to set up local pointer to object format, + * void pointer to implementation defined data, file pointer (which + * probably won't be used, but who knows?), and error function. + */ + void (*init) (struct ofmt * of, void * id, FILE * fp, efunc error); + + /* + * linenum - called any time there is output with a change of + * line number or file. + */ + void (*linenum) (const char * filename, long linenumber, long segto); + + /* + * debug_deflabel - called whenever a label is defined. Parameters + * are the same as to 'symdef()' in the output format. This function + * would be called before the output format version. + */ + + void (*debug_deflabel) (char * name, long segment, long offset, + int is_global, char * special); + /* + * debug_directive - called whenever a DEBUG directive other than 'LINE' + * is encountered. 'directive' contains the first parameter to the + * DEBUG directive, and params contains the rest. For example, + * 'DEBUG VAR _somevar:int' would translate to a call to this + * function with 'directive' equal to "VAR" and 'params' equal to + * "_somevar:int". + */ + void (*debug_directive) (const char * directive, const char * params); + + /* + * typevalue - called whenever the assembler wishes to register a type + * for the last defined label. This routine MUST detect if a type was + * already registered and not re-register it. + */ + void (*debug_typevalue) (long type); + + /* + * debug_output - called whenever output is required + * 'type' is the type of info required, and this is format-specific + */ + void (*debug_output) (int type, void *param); + + /* + * cleanup - called after processing of file is complete + */ + void (*cleanup) (void); + +}; +/* + * The type definition macros + * for debugging + * + * low 3 bits: reserved + * next 5 bits: type + * next 24 bits: number of elements for arrays (0 for labels) + */ + +#define TY_UNKNOWN 0x00 +#define TY_LABEL 0x08 +#define TY_BYTE 0x10 +#define TY_WORD 0x18 +#define TY_DWORD 0x20 +#define TY_FLOAT 0x28 +#define TY_QWORD 0x30 +#define TY_TBYTE 0x38 +#define TY_COMMON 0xE0 +#define TY_SEG 0xE8 +#define TY_EXTERN 0xF0 +#define TY_EQU 0xF8 + +#define TYM_TYPE(x) ((x) & 0xF8) +#define TYM_ELEMENTS(x) (((x) & 0xFFFFFF00) >> 8) + +#define TYS_ELEMENTS(x) ((x) << 8) +/* + * ----- + * Other + * ----- + */ + +/* + * This is a useful #define which I keep meaning to use more often: + * the number of elements of a statically defined array. + */ + +#define elements(x) ( sizeof(x) / sizeof(*(x)) ) + +extern int tasm_compatible_mode; + +/* + * This declaration passes the "pass" number to all other modules + * "pass0" assumes the values: 0, 0, ..., 0, 1, 2 + * where 0 = optimizing pass + * 1 = pass 1 + * 2 = pass 2 + */ + +extern int pass0; /* this is globally known */ +extern int optimizing; + +#endif diff --git a/src/preprocs/nasm/nasmlib.c b/src/preprocs/nasm/nasmlib.c new file mode 100644 index 00000000..bb0ab29e --- /dev/null +++ b/src/preprocs/nasm/nasmlib.c @@ -0,0 +1,1116 @@ +/* nasmlib.c library routines for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" +#include "insns.h" /* For MAX_KEYWORD */ + +static efunc nasm_malloc_error; + +#ifdef LOGALLOC +static FILE *logfp; +#endif + +void nasm_set_malloc_error (efunc error) +{ + nasm_malloc_error = error; +#ifdef LOGALLOC + logfp = fopen ("malloc.log", "w"); + setvbuf (logfp, NULL, _IOLBF, BUFSIZ); + fprintf (logfp, "null pointer is %p\n", NULL); +#endif +} + +#ifdef LOGALLOC +void *nasm_malloc_log (char *file, int line, size_t size) +#else +void *nasm_malloc (size_t size) +#endif +{ + void *p = malloc(size); + if (!p) + nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory"); +#ifdef LOGALLOC + else + fprintf(logfp, "%s %d malloc(%ld) returns %p\n", + file, line, (long)size, p); +#endif + return p; +} + +#ifdef LOGALLOC +void *nasm_realloc_log (char *file, int line, void *q, size_t size) +#else +void *nasm_realloc (void *q, size_t size) +#endif +{ + void *p = q ? realloc(q, size) : malloc(size); + if (!p) + nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory"); +#ifdef LOGALLOC + else if (q) + fprintf(logfp, "%s %d realloc(%p,%ld) returns %p\n", + file, line, q, (long)size, p); + else + fprintf(logfp, "%s %d malloc(%ld) returns %p\n", + file, line, (long)size, p); +#endif + return p; +} + +#ifdef LOGALLOC +void nasm_free_log (char *file, int line, void *q) +#else +void nasm_free (void *q) +#endif +{ + if (q) { + free (q); +#ifdef LOGALLOC + fprintf(logfp, "%s %d free(%p)\n", + file, line, q); +#endif + } +} + +#ifdef LOGALLOC +char *nasm_strdup_log (char *file, int line, const char *s) +#else +char *nasm_strdup (const char *s) +#endif +{ + char *p; + int size = strlen(s)+1; + + p = malloc(size); + if (!p) + nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory"); +#ifdef LOGALLOC + else + fprintf(logfp, "%s %d strdup(%ld) returns %p\n", + file, line, (long)size, p); +#endif + strcpy (p, s); + return p; +} + +#ifdef LOGALLOC +char *nasm_strndup_log (char *file, int line, char *s, size_t len) +#else +char *nasm_strndup (char *s, size_t len) +#endif +{ + char *p; + int size = len+1; + + p = malloc(size); + if (!p) + nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory"); +#ifdef LOGALLOC + else + fprintf(logfp, "%s %d strndup(%ld) returns %p\n", + file, line, (long)size, p); +#endif + strncpy (p, s, len); + p[len] = '\0'; + return p; +} + +#if !defined(stricmp) && !defined(strcasecmp) +int nasm_stricmp (const char *s1, const char *s2) +{ + while (*s1 && tolower(*s1) == tolower(*s2)) + s1++, s2++; + if (!*s1 && !*s2) + return 0; + else if (tolower(*s1) < tolower(*s2)) + return -1; + else + return 1; +} +#endif + +#if !defined(strnicmp) && !defined(strncasecmp) +int nasm_strnicmp (const char *s1, const char *s2, int n) +{ + while (n > 0 && *s1 && tolower(*s1) == tolower(*s2)) + s1++, s2++, n--; + if ((!*s1 && !*s2) || n==0) + return 0; + else if (tolower(*s1) < tolower(*s2)) + return -1; + else + return 1; +} +#endif + +#define lib_isnumchar(c) ( isalnum(c) || (c) == '$') +#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0') + +long readnum (char *str, int *error) +{ + char *r = str, *q; + long radix; + unsigned long result, checklimit; + int digit, last; + int warn = FALSE; + int sign = 1; + + *error = FALSE; + + while (isspace(*r)) r++; /* find start of number */ + + /* + * If the number came from make_tok_num (as a result of an %assign), it + * might have a '-' built into it (rather than in a preceeding token). + */ + if (*r == '-') + { + r++; + sign = -1; + } + + q = r; + + while (lib_isnumchar(*q)) q++; /* find end of number */ + + /* + * If it begins 0x, 0X or $, or ends in H, it's in hex. if it + * ends in Q, it's octal. if it ends in B, it's binary. + * Otherwise, it's ordinary decimal. + */ + if (*r=='0' && (r[1]=='x' || r[1]=='X')) + radix = 16, r += 2; + else if (*r=='$') + radix = 16, r++; + else if (q[-1]=='H' || q[-1]=='h') + radix = 16 , q--; + else if (q[-1]=='Q' || q[-1]=='q') + radix = 8 , q--; + else if (q[-1]=='B' || q[-1]=='b') + radix = 2 , q--; + else + radix = 10; + + /* + * If this number has been found for us by something other than + * the ordinary scanners, then it might be malformed by having + * nothing between the prefix and the suffix. Check this case + * now. + */ + if (r >= q) { + *error = TRUE; + return 0; + } + + /* + * `checklimit' must be 2**32 / radix. We can't do that in + * 32-bit arithmetic, which we're (probably) using, so we + * cheat: since we know that all radices we use are even, we + * can divide 2**31 by radix/2 instead. + */ + checklimit = 0x80000000UL / (radix>>1); + + /* + * Calculate the highest allowable value for the last digit + * of a 32 bit constant... in radix 10, it is 6, otherwise it is 0 + */ + last = (radix == 10 ? 6 : 0); + + result = 0; + while (*r && r < q) { + if (*r<'0' || (*r>'9' && *r<'A') || (digit = numvalue(*r)) >= radix) + { + *error = TRUE; + return 0; + } + if (result > checklimit || + (result == checklimit && digit >= last)) + { + warn = TRUE; + } + + result = radix * result + digit; + r++; + } + + if (warn) + nasm_malloc_error (ERR_WARNING | ERR_PASS1 | ERR_WARN_NOV, + "numeric constant %s does not fit in 32 bits", + str); + + return result*sign; +} + +long readstrnum (char *str, int length, int *warn) +{ + long charconst = 0; + int i; + + *warn = FALSE; + + str += length; + for (i=0; i> 8) & 255), fp); +} + +void fwritelong (long data, FILE *fp) +{ + fputc ((int) (data & 255), fp); + fputc ((int) ((data >> 8) & 255), fp); + fputc ((int) ((data >> 16) & 255), fp); + fputc ((int) ((data >> 24) & 255), fp); +} + +void standard_extension (char *inname, char *outname, char *extension, + efunc error) +{ + char *p, *q; + + if (*outname) /* file name already exists, */ + return; /* so do nothing */ + q = inname; + p = outname; + while (*q) *p++ = *q++; /* copy, and find end of string */ + *p = '\0'; /* terminate it */ + while (p > outname && *--p != '.');/* find final period (or whatever) */ + if (*p != '.') while (*p) p++; /* go back to end if none found */ + if (!strcmp(p, extension)) { /* is the extension already there? */ + if (*extension) + error(ERR_WARNING | ERR_NOFILE, + "file name already ends in `%s': " + "output will be in `nasm.out'", + extension); + else + error(ERR_WARNING | ERR_NOFILE, + "file name already has no extension: " + "output will be in `nasm.out'"); + strcpy(outname, "nasm.out"); + } else + strcpy(p, extension); +} + +#define LEAFSIZ (sizeof(RAA)-sizeof(RAA_UNION)+sizeof(RAA_LEAF)) +#define BRANCHSIZ (sizeof(RAA)-sizeof(RAA_UNION)+sizeof(RAA_BRANCH)) + +#define LAYERSIZ(r) ( (r)->layers==0 ? RAA_BLKSIZE : RAA_LAYERSIZE ) + +static struct RAA *real_raa_init (int layers) +{ + struct RAA *r; + int i; + + if (layers == 0) { + r = nasm_malloc (LEAFSIZ); + r->layers = 0; + memset (r->u.l.data, 0, sizeof(r->u.l.data)); + r->stepsize = 1L; + } else { + r = nasm_malloc (BRANCHSIZ); + r->layers = layers; + for ( i = 0 ; i < RAA_LAYERSIZE ; i++ ) + r->u.b.data[i] = NULL; + r->stepsize = RAA_BLKSIZE; + while (--layers) + r->stepsize *= RAA_LAYERSIZE; + } + return r; +} + +struct RAA *raa_init (void) +{ + return real_raa_init (0); +} + +void raa_free (struct RAA *r) +{ + if (r->layers == 0) + nasm_free (r); + else { + struct RAA **p; + for (p = r->u.b.data; p - r->u.b.data < RAA_LAYERSIZE; p++) + if (*p) + raa_free (*p); + } +} + +long raa_read (struct RAA *r, long posn) +{ + if (posn >= r->stepsize * LAYERSIZ(r)) + return 0; /* Return 0 for undefined entries */ + while (r->layers > 0) { + ldiv_t l; + l = ldiv (posn, r->stepsize); + r = r->u.b.data[l.quot]; + posn = l.rem; + if (!r) + return 0; /* Return 0 for undefined entries */ + } + return r->u.l.data[posn]; +} + +struct RAA *raa_write (struct RAA *r, long posn, long value) +{ + struct RAA *result; + + if (posn < 0) + nasm_malloc_error (ERR_PANIC, "negative position in raa_write"); + + while (r->stepsize * LAYERSIZ(r) <= posn) { + /* + * Must add a layer. + */ + struct RAA *s; + int i; + + s = nasm_malloc (BRANCHSIZ); + for ( i = 0 ; i < RAA_LAYERSIZE ; i++ ) + s->u.b.data[i] = NULL; + s->layers = r->layers + 1; + s->stepsize = LAYERSIZ(r) * r->stepsize; + s->u.b.data[0] = r; + r = s; + } + + result = r; + + while (r->layers > 0) { + ldiv_t l; + struct RAA **s; + l = ldiv (posn, r->stepsize); + s = &r->u.b.data[l.quot]; + if (!*s) + *s = real_raa_init (r->layers - 1); + r = *s; + posn = l.rem; + } + + r->u.l.data[posn] = value; + + return result; +} + +#define SAA_MAXLEN 8192 + +struct SAA *saa_init (long elem_len) +{ + struct SAA *s; + + if (elem_len > SAA_MAXLEN) + nasm_malloc_error (ERR_PANIC | ERR_NOFILE, "SAA with huge elements"); + + s = nasm_malloc (sizeof(struct SAA)); + s->posn = s->start = 0L; + s->elem_len = elem_len; + s->length = SAA_MAXLEN - (SAA_MAXLEN % elem_len); + s->data = nasm_malloc (s->length); + s->next = NULL; + s->end = s; + + return s; +} + +void saa_free (struct SAA *s) +{ + struct SAA *t; + + while (s) { + t = s->next; + nasm_free (s->data); + nasm_free (s); + s = t; + } +} + +void *saa_wstruct (struct SAA *s) +{ + void *p; + + if (s->end->length - s->end->posn < s->elem_len) { + s->end->next = nasm_malloc (sizeof(struct SAA)); + s->end->next->start = s->end->start + s->end->posn; + s->end = s->end->next; + s->end->length = s->length; + s->end->next = NULL; + s->end->posn = 0L; + s->end->data = nasm_malloc (s->length); + } + + p = s->end->data + s->end->posn; + s->end->posn += s->elem_len; + return p; +} + +void saa_wbytes (struct SAA *s, const void *data, long len) +{ + const char *d = data; + + while (len > 0) { + long l = s->end->length - s->end->posn; + if (l > len) + l = len; + if (l > 0) { + if (d) { + memcpy (s->end->data + s->end->posn, d, l); + d += l; + } else + memset (s->end->data + s->end->posn, 0, l); + s->end->posn += l; + len -= l; + } + if (len > 0) { + s->end->next = nasm_malloc (sizeof(struct SAA)); + s->end->next->start = s->end->start + s->end->posn; + s->end = s->end->next; + s->end->length = s->length; + s->end->next = NULL; + s->end->posn = 0L; + s->end->data = nasm_malloc (s->length); + } + } +} + +void saa_rewind (struct SAA *s) +{ + s->rptr = s; + s->rpos = 0L; +} + +void *saa_rstruct (struct SAA *s) +{ + void *p; + + if (!s->rptr) + return NULL; + + if (s->rptr->posn - s->rpos < s->elem_len) { + s->rptr = s->rptr->next; + if (!s->rptr) + return NULL; /* end of array */ + s->rpos = 0L; + } + + p = s->rptr->data + s->rpos; + s->rpos += s->elem_len; + return p; +} + +void *saa_rbytes (struct SAA *s, long *len) +{ + void *p; + + if (!s->rptr) + return NULL; + + p = s->rptr->data + s->rpos; + *len = s->rptr->posn - s->rpos; + s->rptr = s->rptr->next; + s->rpos = 0L; + return p; +} + +void saa_rnbytes (struct SAA *s, void *data, long len) +{ + char *d = data; + + while (len > 0) { + long l; + + if (!s->rptr) + return; + + l = s->rptr->posn - s->rpos; + if (l > len) + l = len; + if (l > 0) { + memcpy (d, s->rptr->data + s->rpos, l); + d += l; + s->rpos += l; + len -= l; + } + if (len > 0) { + s->rptr = s->rptr->next; + s->rpos = 0L; + } + } +} + +void saa_fread (struct SAA *s, long posn, void *data, long len) +{ + struct SAA *p; + long pos; + char *cdata = data; + + if (!s->rptr || posn < s->rptr->start) + saa_rewind (s); + p = s->rptr; + while (posn >= p->start + p->posn) { + p = p->next; + if (!p) + return; /* what else can we do?! */ + } + + pos = posn - p->start; + while (len) { + long l = p->posn - pos; + if (l > len) + l = len; + memcpy (cdata, p->data+pos, l); + len -= l; + cdata += l; + p = p->next; + if (!p) + return; + pos = 0L; + } + s->rptr = p; +} + +void saa_fwrite (struct SAA *s, long posn, void *data, long len) +{ + struct SAA *p; + long pos; + char *cdata = data; + + if (!s->rptr || posn < s->rptr->start) + saa_rewind (s); + p = s->rptr; + while (posn >= p->start + p->posn) { + p = p->next; + if (!p) + return; /* what else can we do?! */ + } + + pos = posn - p->start; + while (len) { + long l = p->posn - pos; + if (l > len) + l = len; + memcpy (p->data+pos, cdata, l); + len -= l; + cdata += l; + p = p->next; + if (!p) + return; + pos = 0L; + } + s->rptr = p; +} + +void saa_fpwrite (struct SAA *s, FILE *fp) +{ + char *data; + long len; + + saa_rewind (s); + while ( (data = saa_rbytes (s, &len)) ) + fwrite (data, 1, len, fp); +} + +/* + * Register, instruction, condition-code and prefix keywords used + * by the scanner. + */ +#include "names.c" +static const char *special_names[] = { + "byte", "dword", "far", "long", "near", "nosplit", "qword", + "short", "strict", "to", "tword", "word" +}; +static const char *prefix_names[] = { + "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne", + "repnz", "repz", "times" +}; + + +/* + * Standard scanner routine used by parser.c and some output + * formats. It keeps a succession of temporary-storage strings in + * stdscan_tempstorage, which can be cleared using stdscan_reset. + */ +static char **stdscan_tempstorage = NULL; +static int stdscan_tempsize = 0, stdscan_templen = 0; +#define STDSCAN_TEMP_DELTA 256 + +static void stdscan_pop(void) +{ + nasm_free (stdscan_tempstorage[--stdscan_templen]); +} + +void stdscan_reset(void) +{ + while (stdscan_templen > 0) + stdscan_pop(); +} + +/* + * Unimportant cleanup is done to avoid confusing people who are trying + * to debug real memory leaks + */ +void nasmlib_cleanup (void) +{ + stdscan_reset(); + nasm_free (stdscan_tempstorage); +} + +static char *stdscan_copy(char *p, int len) +{ + char *text; + + text = nasm_malloc(len+1); + strncpy (text, p, len); + text[len] = '\0'; + + if (stdscan_templen >= stdscan_tempsize) { + stdscan_tempsize += STDSCAN_TEMP_DELTA; + stdscan_tempstorage = nasm_realloc(stdscan_tempstorage, + stdscan_tempsize*sizeof(char *)); + } + stdscan_tempstorage[stdscan_templen++] = text; + + return text; +} + +char *stdscan_bufptr = NULL; +int stdscan (void *private_data, struct tokenval *tv) +{ + char ourcopy[MAX_KEYWORD+1], *r, *s; + + (void) private_data; /* Don't warn that this parameter is unused */ + + while (isspace(*stdscan_bufptr)) stdscan_bufptr++; + if (!*stdscan_bufptr) + return tv->t_type = 0; + + /* we have a token; either an id, a number or a char */ + if (isidstart(*stdscan_bufptr) || + (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1]))) { + /* now we've got an identifier */ + int i; + int is_sym = FALSE; + + if (*stdscan_bufptr == '$') { + is_sym = TRUE; + stdscan_bufptr++; + } + + r = stdscan_bufptr++; + while (isidchar(*stdscan_bufptr)) stdscan_bufptr++; + tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r); + + if (is_sym || stdscan_bufptr-r > MAX_KEYWORD) + return tv->t_type = TOKEN_ID;/* bypass all other checks */ + + for (s=tv->t_charptr, r=ourcopy; *s; s++) + *r++ = tolower (*s); + *r = '\0'; + /* right, so we have an identifier sitting in temp storage. now, + * is it actually a register or instruction name, or what? */ + if ((tv->t_integer=bsi(ourcopy, reg_names, + elements(reg_names)))>=0) { + tv->t_integer += EXPR_REG_START; + return tv->t_type = TOKEN_REG; + } else if ((tv->t_integer=bsi(ourcopy, insn_names, + elements(insn_names)))>=0) { + return tv->t_type = TOKEN_INSN; + } + for (i=0; it_integer = ico[i]; + if ((tv->t_inttwo=bsi(p, conditions, + elements(conditions)))>=0) + return tv->t_type = TOKEN_INSN; + } + if ((tv->t_integer=bsi(ourcopy, prefix_names, + elements(prefix_names)))>=0) { + tv->t_integer += PREFIX_ENUM_START; + return tv->t_type = TOKEN_PREFIX; + } + if ((tv->t_integer=bsi(ourcopy, special_names, + elements(special_names)))>=0) + return tv->t_type = TOKEN_SPECIAL; + if (!nasm_stricmp(ourcopy, "seg")) + return tv->t_type = TOKEN_SEG; + if (!nasm_stricmp(ourcopy, "wrt")) + return tv->t_type = TOKEN_WRT; + return tv->t_type = TOKEN_ID; + } else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) { + /* + * It's a $ sign with no following hex number; this must + * mean it's a Here token ($), evaluating to the current + * assembly location, or a Base token ($$), evaluating to + * the base of the current segment. + */ + stdscan_bufptr++; + if (*stdscan_bufptr == '$') { + stdscan_bufptr++; + return tv->t_type = TOKEN_BASE; + } + return tv->t_type = TOKEN_HERE; + } else if (isnumstart(*stdscan_bufptr)) { /* now we've got a number */ + int rn_error; + + r = stdscan_bufptr++; + while (isnumchar(*stdscan_bufptr)) + stdscan_bufptr++; + + if (*stdscan_bufptr == '.') { + /* + * a floating point constant + */ + stdscan_bufptr++; + while (isnumchar(*stdscan_bufptr) || + ((stdscan_bufptr[-1] == 'e' || stdscan_bufptr[-1] == 'E') + && (*stdscan_bufptr == '-' || *stdscan_bufptr == '+')) ) + { + stdscan_bufptr++; + } + tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r); + return tv->t_type = TOKEN_FLOAT; + } + r = stdscan_copy(r, stdscan_bufptr - r); + tv->t_integer = readnum(r, &rn_error); + stdscan_pop(); + if (rn_error) + return tv->t_type = TOKEN_ERRNUM;/* some malformation occurred */ + tv->t_charptr = NULL; + return tv->t_type = TOKEN_NUM; + } else if (*stdscan_bufptr == '\'' || + *stdscan_bufptr == '"') {/* a char constant */ + char quote = *stdscan_bufptr++, *r; + int rn_warn; + r = tv->t_charptr = stdscan_bufptr; + while (*stdscan_bufptr && *stdscan_bufptr != quote) stdscan_bufptr++; + tv->t_inttwo = stdscan_bufptr - r; /* store full version */ + if (!*stdscan_bufptr) + return tv->t_type = TOKEN_ERRNUM; /* unmatched quotes */ + stdscan_bufptr++; /* skip over final quote */ + tv->t_integer = readstrnum(r, tv->t_inttwo, &rn_warn); + /* FIXME: rn_warn is not checked! */ + return tv->t_type = TOKEN_NUM; + } else if (*stdscan_bufptr == ';') { /* a comment has happened - stay */ + return tv->t_type = 0; + } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '>') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_SHR; + } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '<') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_SHL; + } else if (stdscan_bufptr[0] == '/' && stdscan_bufptr[1] == '/') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_SDIV; + } else if (stdscan_bufptr[0] == '%' && stdscan_bufptr[1] == '%') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_SMOD; + } else if (stdscan_bufptr[0] == '=' && stdscan_bufptr[1] == '=') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_EQ; + } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '>') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_NE; + } else if (stdscan_bufptr[0] == '!' && stdscan_bufptr[1] == '=') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_NE; + } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '=') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_LE; + } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '=') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_GE; + } else if (stdscan_bufptr[0] == '&' && stdscan_bufptr[1] == '&') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_DBL_AND; + } else if (stdscan_bufptr[0] == '^' && stdscan_bufptr[1] == '^') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_DBL_XOR; + } else if (stdscan_bufptr[0] == '|' && stdscan_bufptr[1] == '|') { + stdscan_bufptr += 2; + return tv->t_type = TOKEN_DBL_OR; + } else /* just an ordinary char */ + return tv->t_type = (unsigned char) (*stdscan_bufptr++); +} + +/* + * Return TRUE if the argument is a simple scalar. (Or a far- + * absolute, which counts.) + */ +int is_simple (expr *vect) +{ + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 1; + if (vect->type != EXPR_SIMPLE) + return 0; + do { + vect++; + } while (vect->type && !vect->value); + if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0; + return 1; +} + +/* + * Return TRUE if the argument is a simple scalar, _NOT_ a far- + * absolute. + */ +int is_really_simple (expr *vect) +{ + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 1; + if (vect->type != EXPR_SIMPLE) + return 0; + do { + vect++; + } while (vect->type && !vect->value); + if (vect->type) return 0; + return 1; +} + +/* + * Return TRUE if the argument is relocatable (i.e. a simple + * scalar, plus at most one segment-base, plus possibly a WRT). + */ +int is_reloc (expr *vect) +{ + while (vect->type && !vect->value) /* skip initial value-0 terms */ + vect++; + if (!vect->type) /* trivially return TRUE if nothing */ + return 1; /* is present apart from value-0s */ + if (vect->type < EXPR_SIMPLE) /* FALSE if a register is present */ + return 0; + if (vect->type == EXPR_SIMPLE) { /* skip over a pure number term... */ + do { + vect++; + } while (vect->type && !vect->value); + if (!vect->type) /* ...returning TRUE if that's all */ + return 1; + } + if (vect->type == EXPR_WRT) { /* skip over a WRT term... */ + do { + vect++; + } while (vect->type && !vect->value); + if (!vect->type) /* ...returning TRUE if that's all */ + return 1; + } + if (vect->value != 0 && vect->value != 1) + return 0; /* segment base multiplier non-unity */ + do { /* skip over _one_ seg-base term... */ + vect++; + } while (vect->type && !vect->value); + if (!vect->type) /* ...returning TRUE if that's all */ + return 1; + return 0; /* And return FALSE if there's more */ +} + +/* + * Return TRUE if the argument contains an `unknown' part. + */ +int is_unknown(expr *vect) +{ + while (vect->type && vect->type < EXPR_UNKNOWN) + vect++; + return (vect->type == EXPR_UNKNOWN); +} + +/* + * Return TRUE if the argument contains nothing but an `unknown' + * part. + */ +int is_just_unknown(expr *vect) +{ + while (vect->type && !vect->value) + vect++; + return (vect->type == EXPR_UNKNOWN); +} + +/* + * Return the scalar part of a relocatable vector. (Including + * simple scalar vectors - those qualify as relocatable.) + */ +long reloc_value (expr *vect) +{ + while (vect->type && !vect->value) + vect++; + if (!vect->type) return 0; + if (vect->type == EXPR_SIMPLE) + return vect->value; + else + return 0; +} + +/* + * Return the segment number of a relocatable vector, or NO_SEG for + * simple scalars. + */ +long reloc_seg (expr *vect) +{ + while (vect->type && (vect->type == EXPR_WRT || !vect->value)) + vect++; + if (vect->type == EXPR_SIMPLE) { + do { + vect++; + } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); + } + if (!vect->type) + return NO_SEG; + else + return vect->type - EXPR_SEGBASE; +} + +/* + * Return the WRT segment number of a relocatable vector, or NO_SEG + * if no WRT part is present. + */ +long reloc_wrt (expr *vect) +{ + while (vect->type && vect->type < EXPR_WRT) + vect++; + if (vect->type == EXPR_WRT) { + return vect->value; + } else + return NO_SEG; +} + +/* + * Binary search. + */ +int bsi (char *string, const char **array, int size) +{ + int i = -1, j = size; /* always, i < index < j */ + while (j-i >= 2) { + int k = (i+j)/2; + int l = strcmp(string, array[k]); + if (l<0) /* it's in the first half */ + j = k; + else if (l>0) /* it's in the second half */ + i = k; + else /* we've got it :) */ + return k; + } + return -1; /* we haven't got it :( */ +} + +static char *file_name = NULL; +static long line_number = 0; + +char *src_set_fname(char *newname) +{ + char *oldname = file_name; + file_name = newname; + return oldname; +} + +long src_set_linnum(long newline) +{ + long oldline = line_number; + line_number = newline; + return oldline; +} + +long src_get_linnum(void) +{ + return line_number; +} + +int src_get(long *xline, char **xname) +{ + if (!file_name || !*xname || strcmp(*xname, file_name)) + { + nasm_free(*xname); + *xname = file_name ? nasm_strdup(file_name) : NULL; + *xline = line_number; + return -2; + } + if (*xline != line_number) + { + long tmp = line_number - *xline; + *xline = line_number; + return tmp; + } + return 0; +} + +void nasm_quote(char **str) +{ + int ln=strlen(*str); + char q=(*str)[0]; + char *p; + if (ln>1 && (*str)[ln-1]==q && (q=='"' || q=='\'')) + return; + q = '"'; + if (strchr(*str,q)) + q = '\''; + p = nasm_malloc(ln+3); + strcpy(p+1, *str); + nasm_free(*str); + p[ln+1] = p[0] = q; + p[ln+2] = 0; + *str = p; +} + +char *nasm_strcat(char *one, char *two) +{ + char *rslt; + int l1=strlen(one); + rslt = nasm_malloc(l1+strlen(two)+1); + strcpy(rslt, one); + strcpy(rslt+l1, two); + return rslt; +} + +void null_debug_init(struct ofmt *of, void *id, FILE *fp, efunc error ) {} +void null_debug_linenum(const char *filename, long linenumber, long segto) {} +void null_debug_deflabel(char *name, long segment, long offset, int is_global, char *special) {} +void null_debug_routine(const char *directive, const char *params) {} +void null_debug_typevalue(long type) {} +void null_debug_output(int type, void *param) {} +void null_debug_cleanup(void){} + +struct dfmt null_debug_form = { + "Null debug format", + "null", + null_debug_init, + null_debug_linenum, + null_debug_deflabel, + null_debug_routine, + null_debug_typevalue, + null_debug_output, + null_debug_cleanup +}; + +struct dfmt *null_debug_arr[2] = { &null_debug_form, NULL }; diff --git a/src/preprocs/nasm/nasmlib.h b/src/preprocs/nasm/nasmlib.h new file mode 100644 index 00000000..54964539 --- /dev/null +++ b/src/preprocs/nasm/nasmlib.h @@ -0,0 +1,258 @@ +/* nasmlib.h header file for nasmlib.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_NASMLIB_H +#define NASM_NASMLIB_H + +/* + * If this is defined, the wrappers around malloc et al will + * transform into logging variants, which will cause NASM to create + * a file called `malloc.log' when run, and spew details of all its + * memory management into that. That can then be analysed to detect + * memory leaks and potentially other problems too. + */ +/* #define LOGALLOC */ + +/* + * Wrappers around malloc, realloc and free. nasm_malloc will + * fatal-error and die rather than return NULL; nasm_realloc will + * do likewise, and will also guarantee to work right on being + * passed a NULL pointer; nasm_free will do nothing if it is passed + * a NULL pointer. + */ +#ifdef NASM_NASM_H /* need efunc defined for this */ +void nasm_set_malloc_error (efunc); +#ifndef LOGALLOC +void *nasm_malloc (size_t); +void *nasm_realloc (void *, size_t); +void nasm_free (void *); +char *nasm_strdup (const char *); +char *nasm_strndup (char *, size_t); +#else +void *nasm_malloc_log (char *, int, size_t); +void *nasm_realloc_log (char *, int, void *, size_t); +void nasm_free_log (char *, int, void *); +char *nasm_strdup_log (char *, int, const char *); +char *nasm_strndup_log (char *, int, char *, size_t); +#define nasm_malloc(x) nasm_malloc_log(__FILE__,__LINE__,x) +#define nasm_realloc(x,y) nasm_realloc_log(__FILE__,__LINE__,x,y) +#define nasm_free(x) nasm_free_log(__FILE__,__LINE__,x) +#define nasm_strdup(x) nasm_strdup_log(__FILE__,__LINE__,x) +#define nasm_strndup(x,y) nasm_strndup_log(__FILE__,__LINE__,x,y) +#endif +#endif + +/* + * ANSI doesn't guarantee the presence of `stricmp' or + * `strcasecmp'. + */ +#if defined(stricmp) || defined(strcasecmp) +#if defined(stricmp) +#define nasm_stricmp stricmp +#else +#define nasm_stricmp strcasecmp +#endif +#else +int nasm_stricmp (const char *, const char *); +#endif + +#if defined(strnicmp) || defined(strncasecmp) +#if defined(strnicmp) +#define nasm_strnicmp strnicmp +#else +#define nasm_strnicmp strncasecmp +#endif +#else +int nasm_strnicmp (const char *, const char *, int); +#endif + +/* + * Convert a string into a number, using NASM number rules. Sets + * `*error' to TRUE if an error occurs, and FALSE otherwise. + */ +long readnum(char *str, int *error); + +/* + * Convert a character constant into a number. Sets + * `*warn' to TRUE if an overflow occurs, and FALSE otherwise. + * str points to and length covers the middle of the string, + * without the quotes. + */ +long readstrnum(char *str, int length, int *warn); + +/* + * seg_init: Initialise the segment-number allocator. + * seg_alloc: allocate a hitherto unused segment number. + */ +void seg_init(void); +long seg_alloc(void); + +/* + * many output formats will be able to make use of this: a standard + * function to add an extension to the name of the input file + */ +#ifdef NASM_NASM_H +void standard_extension (char *inname, char *outname, char *extension, + efunc error); +#endif + +/* + * some handy macros that will probably be of use in more than one + * output format: convert integers into little-endian byte packed + * format in memory + */ + +#define WRITELONG(p,v) \ + do { \ + *(p)++ = (v) & 0xFF; \ + *(p)++ = ((v) >> 8) & 0xFF; \ + *(p)++ = ((v) >> 16) & 0xFF; \ + *(p)++ = ((v) >> 24) & 0xFF; \ + } while (0) + +#define WRITESHORT(p,v) \ + do { \ + *(p)++ = (v) & 0xFF; \ + *(p)++ = ((v) >> 8) & 0xFF; \ + } while (0) + +/* + * and routines to do the same thing to a file + */ +void fwriteshort (int data, FILE *fp); +void fwritelong (long data, FILE *fp); + +/* + * Routines to manage a dynamic random access array of longs which + * may grow in size to be more than the largest single malloc'able + * chunk. + */ + +#define RAA_BLKSIZE 4096 /* this many longs allocated at once */ +#define RAA_LAYERSIZE 1024 /* this many _pointers_ allocated */ + +typedef struct RAA RAA; +typedef union RAA_UNION RAA_UNION; +typedef struct RAA_LEAF RAA_LEAF; +typedef struct RAA_BRANCH RAA_BRANCH; + +struct RAA { + /* + * Number of layers below this one to get to the real data. 0 + * means this structure is a leaf, holding RAA_BLKSIZE real + * data items; 1 and above mean it's a branch, holding + * RAA_LAYERSIZE pointers to the next level branch or leaf + * structures. + */ + int layers; + /* + * Number of real data items spanned by one position in the + * `data' array at this level. This number is 1, trivially, for + * a leaf (level 0): for a level 1 branch it should be + * RAA_BLKSIZE, and for a level 2 branch it's + * RAA_LAYERSIZE*RAA_BLKSIZE. + */ + long stepsize; + union RAA_UNION { + struct RAA_LEAF { + long data[RAA_BLKSIZE]; + } l; + struct RAA_BRANCH { + struct RAA *data[RAA_LAYERSIZE]; + } b; + } u; +}; + + +struct RAA *raa_init (void); +void raa_free (struct RAA *); +long raa_read (struct RAA *, long); +struct RAA *raa_write (struct RAA *r, long posn, long value); + +/* + * Routines to manage a dynamic sequential-access array, under the + * same restriction on maximum mallocable block. This array may be + * written to in two ways: a contiguous chunk can be reserved of a + * given size, and a pointer returned, or single-byte data may be + * written. The array can also be read back in the same two ways: + * as a series of big byte-data blocks or as a list of structures + * of a given size. + */ + +struct SAA { + /* + * members `end' and `elem_len' are only valid in first link in + * list; `rptr' and `rpos' are used for reading + */ + struct SAA *next, *end, *rptr; + long elem_len, length, posn, start, rpos; + char *data; +}; + +struct SAA *saa_init (long elem_len); /* 1 == byte */ +void saa_free (struct SAA *); +void *saa_wstruct (struct SAA *); /* return a structure of elem_len */ +void saa_wbytes (struct SAA *, const void *, long); /* write arbitrary bytes */ +void saa_rewind (struct SAA *); /* for reading from beginning */ +void *saa_rstruct (struct SAA *); /* return NULL on EOA */ +void *saa_rbytes (struct SAA *, long *); /* return 0 on EOA */ +void saa_rnbytes (struct SAA *, void *, long); /* read a given no. of bytes */ +void saa_fread (struct SAA *s, long posn, void *p, long len); /* fixup */ +void saa_fwrite (struct SAA *s, long posn, void *p, long len); /* fixup */ +void saa_fpwrite (struct SAA *, FILE *); + +#ifdef NASM_NASM_H +/* + * Standard scanner. + */ +extern char *stdscan_bufptr; +void stdscan_reset(void); +int stdscan (void *private_data, struct tokenval *tv); +#endif + +#ifdef NASM_NASM_H +/* + * Library routines to manipulate expression data types. + */ +int is_reloc(expr *); +int is_simple(expr *); +int is_really_simple (expr *); +int is_unknown(expr *); +int is_just_unknown(expr *); +long reloc_value(expr *); +long reloc_seg(expr *); +long reloc_wrt(expr *); +#endif + +/* + * Binary search routine. Returns index into `array' of an entry + * matching `string', or <0 if no match. `array' is taken to + * contain `size' elements. + */ +int bsi (char *string, const char **array, int size); + + +char *src_set_fname(char *newname); +long src_set_linnum(long newline); +long src_get_linnum(void); +/* + * src_get may be used if you simply want to know the source file and line. + * It is also used if you maintain private status about the source location + * It return 0 if the information was the same as the last time you + * checked, -1 if the name changed and (new-old) if just the line changed. + */ +int src_get(long *xline, char **xname); + +void nasm_quote(char **str); +char *nasm_strcat(char *one, char *two); +void nasmlib_cleanup(void); + +void null_debug_routine(const char *directive, const char *params); +extern struct dfmt null_debug_form; +extern struct dfmt *null_debug_arr[2]; +#endif diff --git a/src/preprocs/nasm/standard.mac b/src/preprocs/nasm/standard.mac new file mode 100644 index 00000000..bbbf90d8 --- /dev/null +++ b/src/preprocs/nasm/standard.mac @@ -0,0 +1,110 @@ +; Standard macro set for NASM -*- nasm -*- + +; Macros to make NASM ignore some TASM directives before the first include +; directive. + + %idefine IDEAL + %idefine JUMPS + %idefine P386 + %idefine P486 + %idefine P586 + %idefine END + +; This is a magic token which indicates the end of the TASM macros +*END*TASM*MACROS* + +; Note that although some user-level forms of directives are defined +; here, not all of them are: the user-level form of a format-specific +; directive should be defined in the module for that directive. + +; These two need to be defined, though the actual definitions will +; be constantly updated during preprocessing. +%define __FILE__ +%define __LINE__ + +%define __SECT__ ; it ought to be defined, even if as nothing + +%imacro section 1+.nolist +%define __SECT__ [section %1] + __SECT__ +%endmacro +%imacro segment 1+.nolist +%define __SECT__ [segment %1] + __SECT__ +%endmacro + +%imacro absolute 1+.nolist +%define __SECT__ [absolute %1] + __SECT__ +%endmacro + +%imacro struc 1.nolist +%push struc +%define %$strucname %1 +[absolute 0] +%$strucname: ; allow definition of `.member' to work sanely +%endmacro +%imacro endstruc 0.nolist +%{$strucname}_size: +%pop +__SECT__ +%endmacro + +%imacro istruc 1.nolist +%push istruc +%define %$strucname %1 +%$strucstart: +%endmacro +%imacro at 1-2+.nolist + times %1-($-%$strucstart) db 0 + %2 +%endmacro +%imacro iend 0.nolist + times %{$strucname}_size-($-%$strucstart) db 0 +%pop +%endmacro + +%imacro align 1-2+.nolist nop + times ($$-$) & ((%1)-1) %2 +%endmacro +%imacro alignb 1-2+.nolist resb 1 + times ($$-$) & ((%1)-1) %2 +%endmacro + +%imacro extern 1-*.nolist +%rep %0 +[extern %1] +%rotate 1 +%endrep +%endmacro + +%imacro bits 1+.nolist +[bits %1] +%endmacro + +%imacro use16 0.nolist +[bits 16] +%endmacro +%imacro use32 0.nolist +[bits 32] +%endmacro + +%imacro global 1-*.nolist +%rep %0 +[global %1] +%rotate 1 +%endrep +%endmacro + +%imacro common 1-*.nolist +%rep %0 +[common %1] +%rotate 1 +%endrep +%endmacro + +%imacro cpu 1+.nolist +[cpu %1] +%endmacro + + -- 2.40.0