From a2dc8a2fa5c40dbfca74d6ce72da2929277dba9a Mon Sep 17 00:00:00 2001 From: Sascha Schumann Date: Wed, 20 Sep 2000 01:15:10 +0000 Subject: [PATCH] 2nd Rewrite of the main scanner engine. This version is cleaner and faster. It handles the following tags currently: Additional ones can be added within seconds. The support for HTML forms has been significantly improved. The scanner will now add proper Hidden Fields for the Session ID. --- ext/standard/url_scanner_ex.c | 834 +++++++++++++++------------------ ext/standard/url_scanner_ex.h | 23 +- ext/standard/url_scanner_ex.re | 357 +++++++------- 3 files changed, 554 insertions(+), 660 deletions(-) diff --git a/ext/standard/url_scanner_ex.c b/ext/standard/url_scanner_ex.c index 7f3f0fbf7b..84021b07f4 100644 --- a/ext/standard/url_scanner_ex.c +++ b/ext/standard/url_scanner_ex.c @@ -1,4 +1,4 @@ -/* Generated by re2c 0.5 on Tue Sep 19 22:11:37 2000 */ +/* Generated by re2c 0.5 on Wed Sep 20 03:07:32 2000 */ #line 1 "/home/sas/src/php4/ext/standard/url_scanner_ex.re" /* +----------------------------------------------------------------------+ @@ -37,8 +37,6 @@ #define url_adapt_ext url_adapt_ext_ex #define url_scanner url_scanner_ex -#define url_adapt_state url_adapt_state_ex -#define url_adapt_state_t url_adapt_state_ex_t static inline void smart_str_append(smart_str *dest, smart_str *src) { @@ -90,15 +88,14 @@ static inline void smart_str_setl(smart_str *dest, const char *src, size_t len) dest->c = (char *) src; } -static inline void smart_str_appends(smart_str *dest, const char *src) -{ - smart_str_appendl(dest, src, strlen(src)); -} +#define smart_str_appends(dest, src) smart_str_appendl(dest, src, sizeof(src)-1) +#if 0 static inline void smart_str_copys(smart_str *dest, const char *src) { smart_str_copyl(dest, src, strlen(src)); } +#endif static inline void smart_str_sets(smart_str *dest, const char *src) { @@ -107,9 +104,9 @@ static inline void smart_str_sets(smart_str *dest, const char *src) static inline void attach_url(smart_str *url, smart_str *name, smart_str *val, const char *separator) { - if (strchr(url->c, ':')) return; + if (memchr(url->c, ':', url->len)) return; - if (strchr(url->c, '?')) + if (memchr(url->c, '?', url->len)) smart_str_appendl(url, separator, 1); else smart_str_appendl(url, "?", 1); @@ -131,12 +128,14 @@ struct php_tag_arg { static struct php_tag_arg check_tag_arg[] = { TAG_ARG_ENTRY(a, href) TAG_ARG_ENTRY(area, href) - TAG_ARG_ENTRY(frame, source) + TAG_ARG_ENTRY(frame, src) TAG_ARG_ENTRY(img, src) + TAG_ARG_ENTRY(input, src) + TAG_ARG_ENTRY(form, fake_entry_for_passing_on_form_tag) {0} }; -static inline void tag_arg(url_adapt_state_t *ctx PLS_DC) +static inline void tag_arg(url_adapt_state_ex_t *ctx PLS_DC) { char f = 0; int i; @@ -153,134 +152,137 @@ static inline void tag_arg(url_adapt_state_t *ctx PLS_DC) smart_str_appends(&ctx->result, "\""); if (f) { - attach_url(&ctx->para, &ctx->name, &ctx->value, PG(arg_separator)); + attach_url(&ctx->val, &ctx->q_name, &ctx->q_value, PG(arg_separator)); } - smart_str_append(&ctx->result, &ctx->para); + smart_str_append(&ctx->result, &ctx->val); smart_str_appends(&ctx->result, "\""); } -#line 162 - - -#define NEXT continue - -#define COPY_ALL \ - smart_str_appendl(&ctx->result, start, YYCURSOR - start); \ - start = NULL; \ - NEXT - -#define YYFILL(n) goto finish -#define YYCTYPE unsigned char -#define YYLIMIT endptr -#define YYCURSOR cursor -#define YYMARKER marker - -#define HANDLE_FORM \ - if (ctx->tag.len == 4 && strncasecmp(ctx->tag.c, "form", ctx->tag.len) == 0) { \ - smart_str_appends(&ctx->result, ">result, &ctx->name); \ - smart_str_appends(&ctx->result, "\" VALUE=\""); \ - smart_str_append(&ctx->result, &ctx->value); \ - smart_str_appends(&ctx->result, "\""); \ - } - -#define GO(n) ctx->state = n - enum { STATE_PLAIN, STATE_TAG, STATE_NEXT_ARG, STATE_ARG, - STATE_PARA + STATE_BEFORE_VAL, + STATE_VAL }; -static void mainloop(url_adapt_state_t *ctx, smart_str *newstuff) +#define YYFILL(n) goto stop +#define YYCTYPE char +#define YYCURSOR xp +#define YYLIMIT end +#define YYMARKER q +#define STATE ctx->state + +#define PASSTHRU() {\ + smart_str_appendl(&ctx->result, start, YYCURSOR - start); \ +} + +#define HANDLE_FORM() {\ + if (ctx->tag.len == 4 && strncasecmp(ctx->tag.c, "form", 4) == 0) {\ + smart_str_appends(&ctx->result, "result, &ctx->q_name); \ + smart_str_appends(&ctx->result, "\" VALUE=\""); \ + smart_str_append(&ctx->result, &ctx->q_value); \ + smart_str_appends(&ctx->result, "\">"); \ + } \ +} + +/* + * HANDLE_TAG copies the HTML Tag and checks whether we + * have that tag in our table. If we might modify it, + * we continue to scan the tag, otherwise we simply copy the complete + * HTML stuff to the result buffer. + */ + +#define HANDLE_TAG() {\ + int __ok = 0; \ + int i; \ + smart_str_setl(&ctx->tag, start, YYCURSOR - start); \ + for (i = 0; check_tag_arg[i].tag; i++) { \ + if (ctx->tag.len == check_tag_arg[i].taglen \ + && strncasecmp(ctx->tag.c, check_tag_arg[i].tag, ctx->tag.len) == 0) { \ + __ok = 1; \ + break; \ + } \ + } \ + STATE = __ok ? STATE_NEXT_ARG : STATE_PLAIN; \ +} + +#define HANDLE_ARG() {\ + smart_str_setl(&ctx->arg, start, YYCURSOR - start); \ +} +#define HANDLE_VAL(quotes) {\ + smart_str_copyl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2); \ + tag_arg(ctx PLS_CC); \ +} + +/* + * Since arg/tag are read-only during the mainloop, we do not need + * to copy them. We need those variables across multiple calls + * to url_adapt() though, but they point to a private buffer. So we + * copy them before leaving the mainloop() and restore them at + * the beginning. + */ + +#define MOVE_TO_CTX(X) \ + if (ctx->X.c) \ + smart_str_copyl(&ctx->c_##X, ctx->X.c, ctx->X.len); \ + else \ + smart_str_free(&ctx->c_##X) + +#define FETCH_FROM_CTX(X) \ + smart_str_setl(&ctx->X, ctx->c_##X.c, ctx->c_##X.len) + +static inline void mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen) { - char *para_start, *arg_start, *tag_start; - char *start = NULL; - char *cursor; - char *marker; - char *endptr; + char *end, *q; + char *xp; + char *start; + int rest; PLS_FETCH(); - arg_start = para_start = tag_start = NULL; - smart_str_append(&ctx->work, newstuff); - smart_str_free(&ctx->result); + FETCH_FROM_CTX(arg); + FETCH_FROM_CTX(tag); + + smart_str_appendl(&ctx->buf, newdata, newlen); + + YYCURSOR = ctx->buf.c; + YYLIMIT = ctx->buf.c + ctx->buf.len; - smart_str_setl(&ctx->arg, ctx->c_arg.c, ctx->c_arg.len); - smart_str_setl(&ctx->tag, ctx->c_tag.c, ctx->c_tag.len); +#line 254 - cursor = ctx->work.c; - endptr = ctx->work.c + ctx->work.len; - while (YYCURSOR < YYLIMIT) { + while(1) { start = YYCURSOR; - #ifdef SCANNER_DEBUG - printf("state %d:%s'\n", ctx->state, YYCURSOR); + printf("state %d at %s\n", STATE, YYCURSOR); #endif - switch (ctx->state) { - + switch(STATE) { + case STATE_PLAIN: { YYCTYPE yych; unsigned int yyaccept; - static unsigned char yybm[] = { - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 0, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - }; goto yy0; yy1: ++YYCURSOR; yy0: if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; - if(yybm[0+yych] & 128) goto yy4; + if(yych != '<') goto yy4; yy2: yych = *++YYCURSOR; yy3: -#line 225 - { tag_start = YYCURSOR; GO(STATE_TAG); COPY_ALL;} -yy4: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy5: if(yybm[0+yych] & 128) goto yy4; -yy6: -#line 226 - { COPY_ALL; } +#line 265 + { PASSTHRU(); STATE = STATE_TAG; continue; } +yy4: yych = *++YYCURSOR; +yy5: +#line 266 + { PASSTHRU(); continue; } } -#line 227 +#line 267 - break; - + break; + case STATE_TAG: { YYCTYPE yych; @@ -319,59 +321,84 @@ yy6: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - goto yy7; -yy8: ++YYCURSOR; -yy7: + goto yy6; +yy7: ++YYCURSOR; +yy6: if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; - if(yych <= '@') goto yy11; - if(yych <= 'Z') goto yy9; - if(yych <= '`') goto yy11; - if(yych >= '{') goto yy11; -yy9: yyaccept = 0; - yych = *(YYMARKER = ++YYCURSOR); - if(yybm[0+yych] & 128) goto yy14; - if(yych == ' ') goto yy12; - if(yych == '>') goto yy12; -yy10: -#line 242 - { - YYCURSOR--; - GO(STATE_PLAIN); - tag_start = NULL; - NEXT; - } -yy11: yych = *++YYCURSOR; - goto yy10; -yy12: yych = *++YYCURSOR; -yy13: -#line 232 - { - YYCURSOR--; - arg_start = YYCURSOR; - smart_str_setl(&ctx->tag, start, YYCURSOR - start); -#ifdef SCANNER_DEBUG - printf("TAG(%s)\n", ctx->tag.c); -#endif - GO(STATE_NEXT_ARG); - COPY_ALL; - } -yy14: ++YYCURSOR; + if(yych <= '@') goto yy10; + if(yych <= 'Z') goto yy8; + if(yych <= '`') goto yy10; + if(yych >= '{') goto yy10; +yy8: yych = *++YYCURSOR; + goto yy13; +yy9: +#line 272 + { HANDLE_TAG() /* Sets STATE */; PASSTHRU(); continue; } +yy10: yych = *++YYCURSOR; +yy11: +#line 273 + { PASSTHRU(); continue; } +yy12: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy15: if(yybm[0+yych] & 128) goto yy14; - if(yych == ' ') goto yy12; - if(yych == '>') goto yy12; -yy16: YYCURSOR = YYMARKER; - switch(yyaccept){ - case 0: goto yy10; +yy13: if(yybm[0+yych] & 128) goto yy12; + goto yy9; +} +#line 274 + + break; + + case STATE_NEXT_ARG: +{ + YYCTYPE yych; + unsigned int yyaccept; + goto yy14; +yy15: ++YYCURSOR; +yy14: + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych <= '='){ + if(yych <= '\n'){ + if(yych <= '\t') goto yy22; + goto yy18; + } else { + if(yych == ' ') goto yy18; + goto yy22; + } + } else { + if(yych <= 'Z'){ + if(yych <= '>') goto yy16; + if(yych <= '@') goto yy22; + goto yy20; + } else { + if(yych <= '`') goto yy22; + if(yych <= 'z') goto yy20; + goto yy22; + } } +yy16: yych = *++YYCURSOR; +yy17: +#line 279 + { PASSTHRU(); HANDLE_FORM(); STATE = STATE_PLAIN; continue; } +yy18: yych = *++YYCURSOR; +yy19: +#line 280 + { PASSTHRU(); continue; } +yy20: yych = *++YYCURSOR; +yy21: +#line 281 + { YYCURSOR--; STATE = STATE_ARG; continue; } +yy22: yych = *++YYCURSOR; +yy23: +#line 282 + { PASSTHRU(); continue; } } -#line 248 +#line 283 - break; + break; - case STATE_NEXT_ARG: + case STATE_ARG: { YYCTYPE yych; unsigned int yyaccept; @@ -380,18 +407,18 @@ yy16: YYCURSOR = YYMARKER; 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 128, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 0, 0, 0, 0, + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -409,373 +436,268 @@ yy16: YYCURSOR = YYMARKER; 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - goto yy17; -yy18: ++YYCURSOR; -yy17: - if(YYLIMIT == YYCURSOR) YYFILL(1); + goto yy24; +yy25: ++YYCURSOR; +yy24: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; - if(yybm[0+yych] & 128) goto yy20; - if(yych == '>') goto yy23; -yy19:yy20: ++YYCURSOR; + if(yych <= '@') goto yy28; + if(yych <= 'Z') goto yy26; + if(yych <= '`') goto yy28; + if(yych >= '{') goto yy28; +yy26: yych = *++YYCURSOR; + goto yy31; +yy27: +#line 288 + { PASSTHRU(); HANDLE_ARG(); STATE = STATE_BEFORE_VAL; continue; } +yy28: yych = *++YYCURSOR; +yy29: +#line 289 + { PASSTHRU(); STATE = STATE_NEXT_ARG; continue; } +yy30: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy21: if(yybm[0+yych] & 128) goto yy20; -yy22: -#line 253 - { - GO(STATE_ARG); - NEXT; - } -yy23: yych = *++YYCURSOR; -yy24: -#line 257 - { - HANDLE_FORM; - GO(STATE_PLAIN); - tag_start = NULL; - COPY_ALL; - } +yy31: if(yybm[0+yych] & 128) goto yy30; + goto yy27; } -#line 263 +#line 290 - break; - case STATE_ARG: - smart_str_appendl(&ctx->result, " ", 1); + case STATE_BEFORE_VAL: { YYCTYPE yych; unsigned int yyaccept; static unsigned char yybm[] = { - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 128, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 0, 0, 64, - 64, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 64, 64, 64, 64, 64, - 64, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, }; - goto yy25; -yy26: ++YYCURSOR; -yy25: - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yybm[0+yych] & 32) goto yy28; - if(yych == ' ') goto yy27; - if(yych <= '<') goto yy30; - if(yych >= '?') goto yy30; -yy27: -#line 281 - { - arg_start = YYCURSOR; - ctx->state--; - COPY_ALL; - } -yy28: yyaccept = 0; - YYMARKER = ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); + goto yy32; +yy33: ++YYCURSOR; +yy32: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; -yy29: if(yybm[0+yych] & 128) goto yy32; - if(yych <= '@'){ - if(yych <= '<') goto yy30; - if(yych <= '=') goto yy35; - if(yych <= '>') goto yy27; - } else { - if(yych <= 'Z') goto yy28; - if(yych <= '`') goto yy30; - if(yych <= 'z') goto yy28; - } -yy30: ++YYCURSOR; + if(yych == ' ') goto yy34; + if(yych == '=') goto yy36; + goto yy38; +yy34: yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if(yych == ' ') goto yy41; + if(yych == '=') goto yy39; +yy35: +#line 295 + { YYCURSOR--; STATE = STATE_NEXT_ARG; continue; } +yy36: yych = *++YYCURSOR; + goto yy40; +yy37: +#line 294 + { PASSTHRU(); STATE = STATE_VAL; continue; } +yy38: yych = *++YYCURSOR; + goto yy35; +yy39: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy31: if(yybm[0+yych] & 64) goto yy30; - goto yy27; -yy32: ++YYCURSOR; +yy40: if(yybm[0+yych] & 128) goto yy39; + goto yy37; +yy41: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy33: if(yybm[0+yych] & 128) goto yy32; - if(yych == '=') goto yy35; -yy34: YYCURSOR = YYMARKER; +yy42: if(yych == ' ') goto yy41; + if(yych == '=') goto yy39; +yy43: YYCURSOR = YYMARKER; switch(yyaccept){ - case 0: goto yy27; + case 0: goto yy35; } -yy35: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy36: if(yych == ' ') goto yy35; -yy37: -#line 269 - { - char *p; - - for (p = start; isalpha(*p); p++); - smart_str_setl(&ctx->arg, start, p - start); -#ifdef SCANNER_DEBUG - printf("ARG(%s)\n", ctx->arg.c); -#endif - para_start = YYCURSOR; - ctx->state++; - COPY_ALL; - } } -#line 286 +#line 296 break; - case STATE_PARA: + case STATE_VAL: { YYCTYPE yych; unsigned int yyaccept; static unsigned char yybm[] = { - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 128, 224, 32, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 0, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 0, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 128, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 128, 192, 0, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 0, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, }; - goto yy38; -yy39: ++YYCURSOR; -yy38: - if((YYLIMIT - YYCURSOR) < 4) YYFILL(4); + goto yy44; +yy45: ++YYCURSOR; +yy44: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; - if(yych <= '"'){ - if(yych == ' ') goto yy43; - if(yych <= '!') goto yy42; + if(yych <= ' '){ + if(yych == '\n') goto yy50; + if(yych <= '\037') goto yy48; + goto yy50; } else { - if(yych <= '>'){ - if(yych <= '=') goto yy42; - goto yy43; + if(yych <= '"'){ + if(yych <= '!') goto yy48; } else { - if(yych == '^') goto yy43; - goto yy42; + if(yych == '>') goto yy50; + goto yy48; } } -yy40: yyaccept = 0; - yych = *(YYMARKER = ++YYCURSOR); - if(yych != '^') goto yy51; -yy41: -#line 315 - { - YYCURSOR--; - ctx->state = 2; - NEXT; - } -yy42: yyaccept = 0; +yy46: yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); - if(yych == '^') goto yy41; - goto yy47; -yy43: yych = *++YYCURSOR; - goto yy41; -yy44: yych = *++YYCURSOR; -yy45: + if(yybm[0+yych] & 128) goto yy53; +yy47: #line 303 - { - YYCURSOR--; - para_start = NULL; - smart_str_copyl(&ctx->para, start, YYCURSOR - start); -#ifdef SCANNER_DEBUG - printf("PARA(%s)\n", ctx->para.c); -#endif - tag_arg(ctx PLS_CC); - arg_start = YYCURSOR; - GO(STATE_NEXT_ARG); - NEXT; - } -yy46: ++YYCURSOR; + { PASSTHRU(); STATE = STATE_NEXT_ARG; continue; } +yy48: yych = *++YYCURSOR; + goto yy52; +yy49: +#line 302 + { HANDLE_VAL(0); STATE = STATE_NEXT_ARG; continue; } +yy50: yych = *++YYCURSOR; + goto yy47; +yy51: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; +yy52: if(yybm[0+yych] & 64) goto yy51; + goto yy49; +yy53: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy47: if(yybm[0+yych] & 32) goto yy46; - if(yych <= ']') goto yy44; -yy48: YYCURSOR = YYMARKER; +yy54: if(yybm[0+yych] & 128) goto yy53; + if(yych <= '=') goto yy56; +yy55: YYCURSOR = YYMARKER; switch(yyaccept){ - case 1: goto yy45; - case 0: goto yy41; + case 0: goto yy47; } -yy49: yyaccept = 1; - yych = *(YYMARKER = ++YYCURSOR); - if(yych == '>') goto yy45; - if(yych == '^') goto yy45; - goto yy57; -yy50: ++YYCURSOR; - if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); - yych = *YYCURSOR; -yy51: if(yybm[0+yych] & 64) goto yy50; - if(yych <= '!') goto yy49; - if(yych <= '"') goto yy52; - if(yych <= ']') goto yy44; - goto yy48; -yy52: yych = *++YYCURSOR; - if(yych == ' ') goto yy53; - if(yych != '>') goto yy47; -yy53: yych = *++YYCURSOR; -yy54: -#line 291 - { - YYCURSOR--; - para_start = NULL; - smart_str_copyl(&ctx->para, start + 1, YYCURSOR - start - 2); -#ifdef SCANNER_DEBUG - printf("PARA(%s)\n", ctx->para.c); -#endif - tag_arg(ctx PLS_CC); - arg_start = YYCURSOR; - GO(STATE_NEXT_ARG); - NEXT; - } -yy55: yych = *++YYCURSOR; - if(yych == ' ') goto yy58; - if(yych == '>') goto yy58; - goto yy48; -yy56: ++YYCURSOR; - if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); - yych = *YYCURSOR; -yy57: if(yybm[0+yych] & 128) goto yy56; - if(yych <= '=') goto yy55; - goto yy48; -yy58: yych = *++YYCURSOR; - goto yy54; +yy56: yych = *++YYCURSOR; +yy57: +#line 301 + { HANDLE_VAL(1); STATE = STATE_NEXT_ARG; continue; } } -#line 320 +#line 304 break; - } } - -#define PRESERVE(s) \ - size_t n = ctx->work.len - (s - ctx->work.c); \ - memmove(ctx->work.c, s, n + 1); \ - ctx->work.len = n - -finish: - if (ctx->arg.c) - smart_str_copyl(&ctx->c_arg, ctx->arg.c, ctx->arg.len); - else - smart_str_free(&ctx->c_arg); - if (ctx->tag.c) - smart_str_copyl(&ctx->c_tag, ctx->tag.c, ctx->tag.len); - else - smart_str_free(&ctx->c_tag); - - if (ctx->state >= 2) { - if (para_start) { - PRESERVE(para_start); - ctx->state = 4; - } else { - if (arg_start) { PRESERVE(arg_start); } - ctx->state = 2; - } - } else if (tag_start) { - PRESERVE(tag_start); - ctx->state = 1; - } else { - ctx->state = 0; - if (start) smart_str_appendl(&ctx->result, start, YYCURSOR - start); - smart_str_free(&ctx->work); } -#ifdef SCANNER_DEBUG - if (ctx->work.c) { - printf("PRESERVING %s'\n", ctx->work.c); - } +stop: +#ifdef SCANNER_DEBUG + printf("stopped in state %d at pos %d (%d:%c)\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR); #endif + + MOVE_TO_CTX(tag); + MOVE_TO_CTX(arg); + + rest = YYLIMIT - start; + + memmove(ctx->buf.c, start, rest); + ctx->buf.c[rest] = '\0'; + ctx->buf.len = rest; } + char *url_adapt_ext(const char *src, size_t srclen, const char *name, const char *value, size_t *newlen) { - smart_str str = {0,0}; char *ret; + url_adapt_state_ex_t *ctx; BLS_FETCH(); - smart_str_sets(&BG(url_adapt_state).name, name); - smart_str_sets(&BG(url_adapt_state).value, value); - str.c = (char *) src; - str.len = srclen; - mainloop(&BG(url_adapt_state), &str); + ctx = &BG(url_adapt_state_ex); - *newlen = BG(url_adapt_state).result.len; + smart_str_sets(&ctx->q_name, name); + smart_str_sets(&ctx->q_value, value); + mainloop(ctx, src, srclen); -#ifdef SCANNER_DEBUG - printf("(%d)NEW(%d): %s'\n", srclen, BG(url_adapt_state).result.len, BG(url_adapt_state).result.c); -#endif + *newlen = ctx->result.len; -#if 1 - ret = BG(url_adapt_state).result.c; - BG(url_adapt_state).result.c = NULL; + if (ctx->result.len == 0) { + return strdup(""); + } + ret = ctx->result.c; + ctx->result.c = NULL; + ctx->result.len = ctx->result.a = 0; return ret; -#else - return strdup(BG(url_adapt_state).result.c); -#endif } PHP_RINIT_FUNCTION(url_scanner) { + url_adapt_state_ex_t *ctx; BLS_FETCH(); + + ctx = &BG(url_adapt_state_ex); - memset(&BG(url_adapt_state), 0, sizeof(BG(url_adapt_state))); + memset(ctx, 0, sizeof(*ctx)); return SUCCESS; } PHP_RSHUTDOWN_FUNCTION(url_scanner) { + url_adapt_state_ex_t *ctx; BLS_FETCH(); + + ctx = &BG(url_adapt_state_ex); - smart_str_free(&BG(url_adapt_state).result); - smart_str_free(&BG(url_adapt_state).work); - smart_str_free(&BG(url_adapt_state).c_tag); - smart_str_free(&BG(url_adapt_state).c_arg); - smart_str_free(&BG(url_adapt_state).para); + smart_str_free(&ctx->result); + smart_str_free(&ctx->buf); + smart_str_free(&ctx->c_tag); + smart_str_free(&ctx->c_arg); + smart_str_free(&ctx->val); return SUCCESS; } diff --git a/ext/standard/url_scanner_ex.h b/ext/standard/url_scanner_ex.h index e207a4b431..059552b3de 100644 --- a/ext/standard/url_scanner_ex.h +++ b/ext/standard/url_scanner_ex.h @@ -30,17 +30,26 @@ typedef struct { size_t a; } smart_str; + typedef struct { - smart_str c_arg; + /* Used by the mainloop of the scanner */ + smart_str tag; /* read only */ + smart_str arg; /* read only */ + smart_str val; + smart_str buf; + + /* Dito, but they are used only for preserving data across calls */ smart_str c_tag; - smart_str arg; - smart_str tag; - smart_str para; - smart_str work; + smart_str c_arg; + + /* The result buffer */ smart_str result; + + /* The data which is appended to each relative URL */ + smart_str q_name; + smart_str q_value; + int state; - smart_str name; - smart_str value; } url_adapt_state_ex_t; #endif diff --git a/ext/standard/url_scanner_ex.re b/ext/standard/url_scanner_ex.re index ea4f24ad21..790f38ad62 100644 --- a/ext/standard/url_scanner_ex.re +++ b/ext/standard/url_scanner_ex.re @@ -35,8 +35,6 @@ #define url_adapt_ext url_adapt_ext_ex #define url_scanner url_scanner_ex -#define url_adapt_state url_adapt_state_ex -#define url_adapt_state_t url_adapt_state_ex_t static inline void smart_str_append(smart_str *dest, smart_str *src) { @@ -88,15 +86,14 @@ static inline void smart_str_setl(smart_str *dest, const char *src, size_t len) dest->c = (char *) src; } -static inline void smart_str_appends(smart_str *dest, const char *src) -{ - smart_str_appendl(dest, src, strlen(src)); -} +#define smart_str_appends(dest, src) smart_str_appendl(dest, src, sizeof(src)-1) +#if 0 static inline void smart_str_copys(smart_str *dest, const char *src) { smart_str_copyl(dest, src, strlen(src)); } +#endif static inline void smart_str_sets(smart_str *dest, const char *src) { @@ -105,9 +102,9 @@ static inline void smart_str_sets(smart_str *dest, const char *src) static inline void attach_url(smart_str *url, smart_str *name, smart_str *val, const char *separator) { - if (strchr(url->c, ':')) return; + if (memchr(url->c, ':', url->len)) return; - if (strchr(url->c, '?')) + if (memchr(url->c, '?', url->len)) smart_str_appendl(url, separator, 1); else smart_str_appendl(url, "?", 1); @@ -129,12 +126,14 @@ struct php_tag_arg { static struct php_tag_arg check_tag_arg[] = { TAG_ARG_ENTRY(a, href) TAG_ARG_ENTRY(area, href) - TAG_ARG_ENTRY(frame, source) + TAG_ARG_ENTRY(frame, src) TAG_ARG_ENTRY(img, src) + TAG_ARG_ENTRY(input, src) + TAG_ARG_ENTRY(form, fake_entry_for_passing_on_form_tag) {0} }; -static inline void tag_arg(url_adapt_state_t *ctx PLS_DC) +static inline void tag_arg(url_adapt_state_ex_t *ctx PLS_DC) { char f = 0; int i; @@ -151,261 +150,225 @@ static inline void tag_arg(url_adapt_state_t *ctx PLS_DC) smart_str_appends(&ctx->result, "\""); if (f) { - attach_url(&ctx->para, &ctx->name, &ctx->value, PG(arg_separator)); + attach_url(&ctx->val, &ctx->q_name, &ctx->q_value, PG(arg_separator)); } - smart_str_append(&ctx->result, &ctx->para); + smart_str_append(&ctx->result, &ctx->val); smart_str_appends(&ctx->result, "\""); } -/*!re2c -all = [\000-\377]; -*/ - -#define NEXT continue - -#define COPY_ALL \ - smart_str_appendl(&ctx->result, start, YYCURSOR - start); \ - start = NULL; \ - NEXT - -#define YYFILL(n) goto finish -#define YYCTYPE unsigned char -#define YYLIMIT endptr -#define YYCURSOR cursor -#define YYMARKER marker - -#define HANDLE_FORM \ - if (ctx->tag.len == 4 && strncasecmp(ctx->tag.c, "form", ctx->tag.len) == 0) { \ - smart_str_appends(&ctx->result, ">result, &ctx->name); \ - smart_str_appends(&ctx->result, "\" VALUE=\""); \ - smart_str_append(&ctx->result, &ctx->value); \ - smart_str_appends(&ctx->result, "\""); \ - } - -#define GO(n) ctx->state = n - enum { STATE_PLAIN, STATE_TAG, STATE_NEXT_ARG, STATE_ARG, - STATE_PARA + STATE_BEFORE_VAL, + STATE_VAL }; -static void mainloop(url_adapt_state_t *ctx, smart_str *newstuff) +#define YYFILL(n) goto stop +#define YYCTYPE char +#define YYCURSOR xp +#define YYLIMIT end +#define YYMARKER q +#define STATE ctx->state + +#define PASSTHRU() {\ + smart_str_appendl(&ctx->result, start, YYCURSOR - start); \ +} + +#define HANDLE_FORM() {\ + if (ctx->tag.len == 4 && strncasecmp(ctx->tag.c, "form", 4) == 0) {\ + smart_str_appends(&ctx->result, "result, &ctx->q_name); \ + smart_str_appends(&ctx->result, "\" VALUE=\""); \ + smart_str_append(&ctx->result, &ctx->q_value); \ + smart_str_appends(&ctx->result, "\">"); \ + } \ +} + +/* + * HANDLE_TAG copies the HTML Tag and checks whether we + * have that tag in our table. If we might modify it, + * we continue to scan the tag, otherwise we simply copy the complete + * HTML stuff to the result buffer. + */ + +#define HANDLE_TAG() {\ + int __ok = 0; \ + int i; \ + smart_str_setl(&ctx->tag, start, YYCURSOR - start); \ + for (i = 0; check_tag_arg[i].tag; i++) { \ + if (ctx->tag.len == check_tag_arg[i].taglen \ + && strncasecmp(ctx->tag.c, check_tag_arg[i].tag, ctx->tag.len) == 0) { \ + __ok = 1; \ + break; \ + } \ + } \ + STATE = __ok ? STATE_NEXT_ARG : STATE_PLAIN; \ +} + +#define HANDLE_ARG() {\ + smart_str_setl(&ctx->arg, start, YYCURSOR - start); \ +} +#define HANDLE_VAL(quotes) {\ + smart_str_copyl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2); \ + tag_arg(ctx PLS_CC); \ +} + +/* + * Since arg/tag are read-only during the mainloop, we do not need + * to copy them. We need those variables across multiple calls + * to url_adapt() though, but they point to a private buffer. So we + * copy them before leaving the mainloop() and restore them at + * the beginning. + */ + +#define MOVE_TO_CTX(X) \ + if (ctx->X.c) \ + smart_str_copyl(&ctx->c_##X, ctx->X.c, ctx->X.len); \ + else \ + smart_str_free(&ctx->c_##X) + +#define FETCH_FROM_CTX(X) \ + smart_str_setl(&ctx->X, ctx->c_##X.c, ctx->c_##X.len) + +static inline void mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen) { - char *para_start, *arg_start, *tag_start; - char *start = NULL; - char *cursor; - char *marker; - char *endptr; + char *end, *q; + char *xp; + char *start; + int rest; PLS_FETCH(); - arg_start = para_start = tag_start = NULL; - smart_str_append(&ctx->work, newstuff); - smart_str_free(&ctx->result); + FETCH_FROM_CTX(arg); + FETCH_FROM_CTX(tag); - smart_str_setl(&ctx->arg, ctx->c_arg.c, ctx->c_arg.len); - smart_str_setl(&ctx->tag, ctx->c_tag.c, ctx->c_tag.len); + smart_str_appendl(&ctx->buf, newdata, newlen); + + YYCURSOR = ctx->buf.c; + YYLIMIT = ctx->buf.c + ctx->buf.len; - cursor = ctx->work.c; - endptr = ctx->work.c + ctx->work.len; +/*!re2c +any = [\000-\377]; +alpha = [a-zA-Z]; +*/ - while (YYCURSOR < YYLIMIT) { + while(1) { start = YYCURSOR; - #ifdef SCANNER_DEBUG - printf("state %d:%s'\n", ctx->state, YYCURSOR); + printf("state %d at %s\n", STATE, YYCURSOR); #endif - switch (ctx->state) { - + switch(STATE) { + case STATE_PLAIN: /*!re2c - "<" { tag_start = YYCURSOR; GO(STATE_TAG); COPY_ALL;} - (all\[<])+ { COPY_ALL; } + [<] { PASSTHRU(); STATE = STATE_TAG; continue; } + (any\[<]) { PASSTHRU(); continue; } */ - break; - + break; + case STATE_TAG: /*!re2c - [a-zA-Z]+ [ >] { - YYCURSOR--; - arg_start = YYCURSOR; - smart_str_setl(&ctx->tag, start, YYCURSOR - start); -#ifdef SCANNER_DEBUG - printf("TAG(%s)\n", ctx->tag.c); -#endif - GO(STATE_NEXT_ARG); - COPY_ALL; - } - all { - YYCURSOR--; - GO(STATE_PLAIN); - tag_start = NULL; - NEXT; - } + alpha+ { HANDLE_TAG() /* Sets STATE */; PASSTHRU(); continue; } + any { PASSTHRU(); continue; } */ - break; - + break; + case STATE_NEXT_ARG: /*!re2c - [ ]+ { - GO(STATE_ARG); - NEXT; - } - ">" { - HANDLE_FORM; - GO(STATE_PLAIN); - tag_start = NULL; - COPY_ALL; - } + ">" { PASSTHRU(); HANDLE_FORM(); STATE = STATE_PLAIN; continue; } + [ \n] { PASSTHRU(); continue; } + alpha { YYCURSOR--; STATE = STATE_ARG; continue; } + any { PASSTHRU(); continue; } */ - break; + break; case STATE_ARG: - smart_str_appendl(&ctx->result, " ", 1); /*!re2c - [a-zA-Z]+ [ ]* "=" [ ]* { - char *p; + alpha+ { PASSTHRU(); HANDLE_ARG(); STATE = STATE_BEFORE_VAL; continue; } + any { PASSTHRU(); STATE = STATE_NEXT_ARG; continue; } +*/ - for (p = start; isalpha(*p); p++); - smart_str_setl(&ctx->arg, start, p - start); -#ifdef SCANNER_DEBUG - printf("ARG(%s)\n", ctx->arg.c); -#endif - para_start = YYCURSOR; - ctx->state++; - COPY_ALL; - } - (all\[ =>])* { - arg_start = YYCURSOR; - ctx->state--; - COPY_ALL; - } + case STATE_BEFORE_VAL: +/*!re2c + [ ]* "=" [ ]* { PASSTHRU(); STATE = STATE_VAL; continue; } + any { YYCURSOR--; STATE = STATE_NEXT_ARG; continue; } */ break; - case STATE_PARA: + case STATE_VAL: /*!re2c - ["] (all\[^>"])* ["] [ >] { - YYCURSOR--; - para_start = NULL; - smart_str_copyl(&ctx->para, start + 1, YYCURSOR - start - 2); -#ifdef SCANNER_DEBUG - printf("PARA(%s)\n", ctx->para.c); -#endif - tag_arg(ctx PLS_CC); - arg_start = YYCURSOR; - GO(STATE_NEXT_ARG); - NEXT; - } - (all\[^> ])+ [ >] { - YYCURSOR--; - para_start = NULL; - smart_str_copyl(&ctx->para, start, YYCURSOR - start); -#ifdef SCANNER_DEBUG - printf("PARA(%s)\n", ctx->para.c); -#endif - tag_arg(ctx PLS_CC); - arg_start = YYCURSOR; - GO(STATE_NEXT_ARG); - NEXT; - } - all { - YYCURSOR--; - ctx->state = 2; - NEXT; - } + ["] (any\[">])+ ["] { HANDLE_VAL(1); STATE = STATE_NEXT_ARG; continue; } + (any\[ \n>"])+ { HANDLE_VAL(0); STATE = STATE_NEXT_ARG; continue; } + any { PASSTHRU(); STATE = STATE_NEXT_ARG; continue; } */ break; - } } - -#define PRESERVE(s) \ - size_t n = ctx->work.len - (s - ctx->work.c); \ - memmove(ctx->work.c, s, n + 1); \ - ctx->work.len = n - -finish: - if (ctx->arg.c) - smart_str_copyl(&ctx->c_arg, ctx->arg.c, ctx->arg.len); - else - smart_str_free(&ctx->c_arg); - if (ctx->tag.c) - smart_str_copyl(&ctx->c_tag, ctx->tag.c, ctx->tag.len); - else - smart_str_free(&ctx->c_tag); - - if (ctx->state >= 2) { - if (para_start) { - PRESERVE(para_start); - ctx->state = 4; - } else { - if (arg_start) { PRESERVE(arg_start); } - ctx->state = 2; - } - } else if (tag_start) { - PRESERVE(tag_start); - ctx->state = 1; - } else { - ctx->state = 0; - if (start) smart_str_appendl(&ctx->result, start, YYCURSOR - start); - smart_str_free(&ctx->work); } -#ifdef SCANNER_DEBUG - if (ctx->work.c) { - printf("PRESERVING %s'\n", ctx->work.c); - } +stop: +#ifdef SCANNER_DEBUG + printf("stopped in state %d at pos %d (%d:%c)\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR); #endif + + MOVE_TO_CTX(tag); + MOVE_TO_CTX(arg); + + rest = YYLIMIT - start; + + memmove(ctx->buf.c, start, rest); + ctx->buf.c[rest] = '\0'; + ctx->buf.len = rest; } + char *url_adapt_ext(const char *src, size_t srclen, const char *name, const char *value, size_t *newlen) { - smart_str str = {0,0}; char *ret; + url_adapt_state_ex_t *ctx; BLS_FETCH(); - smart_str_sets(&BG(url_adapt_state).name, name); - smart_str_sets(&BG(url_adapt_state).value, value); - str.c = (char *) src; - str.len = srclen; - mainloop(&BG(url_adapt_state), &str); + ctx = &BG(url_adapt_state_ex); - *newlen = BG(url_adapt_state).result.len; + smart_str_sets(&ctx->q_name, name); + smart_str_sets(&ctx->q_value, value); + mainloop(ctx, src, srclen); -#ifdef SCANNER_DEBUG - printf("(%d)NEW(%d): %s'\n", srclen, BG(url_adapt_state).result.len, BG(url_adapt_state).result.c); -#endif + *newlen = ctx->result.len; -#if 1 - ret = BG(url_adapt_state).result.c; - BG(url_adapt_state).result.c = NULL; + if (ctx->result.len == 0) { + return strdup(""); + } + ret = ctx->result.c; + ctx->result.c = NULL; + ctx->result.len = ctx->result.a = 0; return ret; -#else - return strdup(BG(url_adapt_state).result.c); -#endif } PHP_RINIT_FUNCTION(url_scanner) { + url_adapt_state_ex_t *ctx; BLS_FETCH(); + + ctx = &BG(url_adapt_state_ex); - memset(&BG(url_adapt_state), 0, sizeof(BG(url_adapt_state))); + memset(ctx, 0, sizeof(*ctx)); return SUCCESS; } PHP_RSHUTDOWN_FUNCTION(url_scanner) { + url_adapt_state_ex_t *ctx; BLS_FETCH(); + + ctx = &BG(url_adapt_state_ex); - smart_str_free(&BG(url_adapt_state).result); - smart_str_free(&BG(url_adapt_state).work); - smart_str_free(&BG(url_adapt_state).c_tag); - smart_str_free(&BG(url_adapt_state).c_arg); - smart_str_free(&BG(url_adapt_state).para); + smart_str_free(&ctx->result); + smart_str_free(&ctx->buf); + smart_str_free(&ctx->c_tag); + smart_str_free(&ctx->c_arg); + smart_str_free(&ctx->val); return SUCCESS; } -- 2.50.1