From ef13ab5750202d17a138566f6e4e9ffca84eccf5 Mon Sep 17 00:00:00 2001 From: Sascha Schumann Date: Wed, 4 Jul 2001 15:30:21 +0000 Subject: [PATCH] Improve url scanner speed (up to 40% for large chunks of data) and handle some corner cases better. The scanner has been changed to the format as proposed in "RE2C - A More Versatile Scanner Generator" by Cowan et al. --- ext/session/php_session.h | 6 +- ext/session/session.c | 10 + ext/standard/output.c | 5 + ext/standard/url_scanner_ex.c | 682 ++++++++++++++++++++------------- ext/standard/url_scanner_ex.h | 2 + ext/standard/url_scanner_ex.re | 159 +++++--- main/output.c | 5 + 7 files changed, 535 insertions(+), 334 deletions(-) diff --git a/ext/session/php_session.h b/ext/session/php_session.h index 97623915b5..90ee4cfa86 100644 --- a/ext/session/php_session.h +++ b/ext/session/php_session.h @@ -150,9 +150,11 @@ typedef struct ps_serializer_struct { #ifdef TRANS_SID void session_adapt_uris(const char *, size_t, char **, size_t *); void session_adapt_url(const char *, size_t, char **, size_t *); +void session_adapt_flush(int (*)(const char *, uint)); #else -#define session_adapt_uris(a,b,c,d) -#define session_adapt_url(a,b,c,d) +#define session_adapt_uris(a,b,c,d) do { } while(0) +#define session_adapt_url(a,b,c,d) do { } while(0) +#define session_adapt_flush(a) do { } while(0) #endif void php_set_session_var(char *name, size_t namelen, zval *state_val,HashTable *var_hash PSLS_DC); diff --git a/ext/session/session.c b/ext/session/session.c index 81a4eb0a81..8067b04a7d 100644 --- a/ext/session/session.c +++ b/ext/session/session.c @@ -1326,6 +1326,16 @@ void session_adapt_url(const char *url, size_t urllen, char **new, size_t *newle if (PS(define_sid) && PS(nr_open_sessions) > 0) *new = url_adapt_single_url(url, urllen, PS(session_name), PS(id), newlen); } + +void session_adapt_flush(int (*write)(const char *, uint)) +{ + char *str; + size_t len; + + str = url_adapt_flush(&len); + if (str) write(str, len); +} + #endif /* {{{ proto void session_unset(void) diff --git a/ext/standard/output.c b/ext/standard/output.c index c29ef41900..1509cb277b 100644 --- a/ext/standard/output.c +++ b/ext/standard/output.c @@ -254,10 +254,15 @@ PHPAPI void php_end_ob_buffer(zend_bool send_buffer, zend_bool just_flush) PHPAPI void php_end_ob_buffers(zend_bool send_buffer) { OLS_FETCH(); + BLS_FETCH(); while (OG(nesting_level)!=0) { php_end_ob_buffer(send_buffer, 0); } + + if (send_buffer && BG(use_trans_sid)) { + session_adapt_flush(OG(php_header_write)); + } } /* }}} */ diff --git a/ext/standard/url_scanner_ex.c b/ext/standard/url_scanner_ex.c index 835e37f7f9..d8e49e74f8 100644 --- a/ext/standard/url_scanner_ex.c +++ b/ext/standard/url_scanner_ex.c @@ -1,5 +1,5 @@ -/* Generated by re2c 0.5 on Wed Apr 4 19:29:46 2001 */ -#line 1 "/usr/src/web/php/php4/ext/standard/url_scanner_ex.re" +/* Generated by re2c 0.5 on Wed Jul 4 17:05:51 2001 */ +#line 1 "/home/sas/src/php4/ext/standard/url_scanner_ex.re" /* +----------------------------------------------------------------------+ | PHP version 4.0 | @@ -41,8 +41,6 @@ #include "php_smart_str.h" -/* {{{ PHP_INI_MH - */ static PHP_INI_MH(OnUpdateTags) { url_adapt_state_ex_t *ctx; @@ -86,38 +84,100 @@ static PHP_INI_MH(OnUpdateTags) return SUCCESS; } -/* }}} */ PHP_INI_BEGIN() STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=fakeentry", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals) PHP_INI_END() -/* {{{ append_modified_url - */ +#line 94 + + +#define YYFILL(n) goto done +#define YYCTYPE unsigned char +#define YYCURSOR p +#define YYLIMIT q +#define YYMARKER r + static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *name, smart_str *val, const char *separator) { - register const char *p, *q; + register const char *p, *q, *r; const char *bash = NULL; const char *sep = "?"; - q = url->c + url->len; - - for (p = url->c; p < q; p++) { - switch(*p) { - case ':': - smart_str_append(dest, url); - return; - case '?': - sep = separator; - break; - case '#': - bash = p; - break; - } - } + q = (p = url->c) + url->len; +scan: +{ + YYCTYPE yych; + unsigned int yyaccept; + static unsigned char yybm[] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 128, 128, 128, 128, 0, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + goto yy0; +yy1: ++YYCURSOR; +yy0: + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yybm[0+yych] & 128) goto yy8; + if(yych <= '9') goto yy6; + if(yych >= ';') goto yy4; +yy2: yych = *++YYCURSOR; +yy3: +#line 112 + { smart_str_append(dest, url); return; } +yy4: yych = *++YYCURSOR; +yy5: +#line 113 + { sep = separator; goto done; } +yy6: yych = *++YYCURSOR; +yy7: +#line 114 + { bash = p; goto done; } +yy8: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; +yy9: if(yybm[0+yych] & 128) goto yy8; +yy10: +#line 115 + { goto scan; } +} +#line 116 + +done: + /* Don't modify URLs of the format "#mark" */ - if (bash - url->c == 0) { + if (bash && bash - url->c == 0) { smart_str_append(dest, url); return; } @@ -135,7 +195,12 @@ static inline void append_modified_url(smart_str *url, smart_str *dest, smart_st if (bash) smart_str_appendl(dest, bash, q - bash); } -/* }}} */ + +#undef YYFILL +#undef YYCTYPE +#undef YYCURSOR +#undef YYLIMIT +#undef YYMARKER static inline void tag_arg(url_adapt_state_ex_t *ctx, char quote PLS_DC) { @@ -154,7 +219,7 @@ static inline void tag_arg(url_adapt_state_ex_t *ctx, char quote PLS_DC) } enum { - STATE_PLAIN, + STATE_PLAIN = 0, STATE_TAG, STATE_NEXT_ARG, STATE_ARG, @@ -227,8 +292,6 @@ static inline void handle_val(STD_PARA, char quotes, char type) #define scdebug(x) #endif -/* {{{ mainloop - */ static inline void mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen) { char *end, *q; @@ -242,38 +305,81 @@ static inline void mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size YYCURSOR = ctx->buf.c; YYLIMIT = ctx->buf.c + ctx->buf.len; -#line 238 + switch (STATE) { + case STATE_PLAIN: goto state_plain; + case STATE_TAG: goto state_tag; + case STATE_NEXT_ARG: goto state_next_arg; + case STATE_ARG: goto state_arg; + case STATE_BEFORE_VAL: goto state_before_val; + case STATE_VAL: goto state_val; + } + +state_plain_begin: + STATE = STATE_PLAIN; - while(1) { - start = YYCURSOR; - scdebug(("state %d at %s\n", STATE, YYCURSOR)); - switch(STATE) { - - case STATE_PLAIN: +state_plain: + start = YYCURSOR; { YYCTYPE yych; unsigned int yyaccept; - goto yy0; -yy1: ++YYCURSOR; -yy0: + static unsigned char yybm[] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 0, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + goto yy11; +yy12: ++YYCURSOR; +yy11: if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; - if(yych != '<') goto yy4; -yy2: yych = *++YYCURSOR; -yy3: -#line 247 - { passthru(STD_ARGS); STATE = STATE_TAG; continue; } -yy4: yych = *++YYCURSOR; -yy5: -#line 248 - { passthru(STD_ARGS); continue; } + if(yybm[0+yych] & 128) goto yy15; +yy13: yych = *++YYCURSOR; +yy14: +#line 264 + { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; } +yy15: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; +yy16: if(yybm[0+yych] & 128) goto yy15; +yy17: +#line 265 + { passthru(STD_ARGS); goto state_plain; } } -#line 249 +#line 266 - break; - - case STATE_TAG: + +state_tag: + start = YYCURSOR; { YYCTYPE yych; unsigned int yyaccept; @@ -311,84 +417,127 @@ yy5: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - goto yy6; -yy7: ++YYCURSOR; -yy6: + goto yy18; +yy19: ++YYCURSOR; +yy18: if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; - if(yych <= '@') goto yy10; - if(yych <= 'Z') goto yy8; - if(yych <= '`') goto yy10; - if(yych >= '{') goto yy10; -yy8: yych = *++YYCURSOR; - goto yy13; -yy9: -#line 254 - { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); continue; } -yy10: yych = *++YYCURSOR; -yy11: -#line 255 - { passthru(STD_ARGS); STATE = STATE_PLAIN; continue; } -yy12: ++YYCURSOR; + if(yych <= '@') goto yy22; + if(yych <= 'Z') goto yy20; + if(yych <= '`') goto yy22; + if(yych >= '{') goto yy22; +yy20: yych = *++YYCURSOR; + goto yy25; +yy21: +#line 271 + { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; } +yy22: yych = *++YYCURSOR; +yy23: +#line 272 + { passthru(STD_ARGS); goto state_plain_begin; } +yy24: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy13: if(yybm[0+yych] & 128) goto yy12; - goto yy9; +yy25: if(yybm[0+yych] & 128) goto yy24; + goto yy21; } -#line 256 +#line 273 - break; - - case STATE_NEXT_ARG: + +state_next_arg_begin: + STATE = STATE_NEXT_ARG; + +state_next_arg: + start = YYCURSOR; { YYCTYPE yych; unsigned int yyaccept; - goto yy14; -yy15: ++YYCURSOR; -yy14: - if(YYLIMIT == YYCURSOR) YYFILL(1); + static unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 128, 128, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + goto yy26; +yy27: ++YYCURSOR; +yy26: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; if(yych <= '='){ - if(yych <= '\n'){ - if(yych <= '\t') goto yy22; - goto yy18; + if(yych <= '\v'){ + if(yych <= '\b') goto yy34; + goto yy30; } else { - if(yych == ' ') goto yy18; - goto yy22; + if(yych == ' ') goto yy30; + goto yy34; } } else { if(yych <= 'Z'){ - if(yych <= '>') goto yy16; - if(yych <= '@') goto yy22; - goto yy20; + if(yych <= '>') goto yy28; + if(yych <= '@') goto yy34; + goto yy32; } else { - if(yych <= '`') goto yy22; - if(yych <= 'z') goto yy20; - goto yy22; + if(yych <= '`') goto yy34; + if(yych <= 'z') goto yy32; + goto yy34; } } -yy16: yych = *++YYCURSOR; -yy17: -#line 261 - { passthru(STD_ARGS); handle_form(STD_ARGS); STATE = STATE_PLAIN; continue; } -yy18: yych = *++YYCURSOR; -yy19: -#line 262 - { passthru(STD_ARGS); continue; } -yy20: yych = *++YYCURSOR; -yy21: -#line 263 - { YYCURSOR--; STATE = STATE_ARG; continue; } -yy22: yych = *++YYCURSOR; -yy23: -#line 264 - { passthru(STD_ARGS); STATE = STATE_PLAIN; continue; } +yy28: yych = *++YYCURSOR; +yy29: +#line 281 + { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; } +yy30: yych = *++YYCURSOR; + goto yy37; +yy31: +#line 282 + { passthru(STD_ARGS); goto state_next_arg; } +yy32: yych = *++YYCURSOR; +yy33: +#line 283 + { --YYCURSOR; STATE = STATE_ARG; goto state_arg; } +yy34: yych = *++YYCURSOR; +yy35: +#line 284 + { passthru(STD_ARGS); goto state_plain_begin; } +yy36: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; +yy37: if(yybm[0+yych] & 128) goto yy36; + goto yy31; } -#line 265 +#line 285 - break; - case STATE_ARG: +state_arg: + start = YYCURSOR; { YYCTYPE yych; unsigned int yyaccept; @@ -426,34 +575,35 @@ yy23: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - goto yy24; -yy25: ++YYCURSOR; -yy24: + goto yy38; +yy39: ++YYCURSOR; +yy38: if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; - if(yych <= '@') goto yy28; - if(yych <= 'Z') goto yy26; - if(yych <= '`') goto yy28; - if(yych >= '{') goto yy28; -yy26: yych = *++YYCURSOR; - goto yy31; -yy27: -#line 270 - { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; continue; } -yy28: yych = *++YYCURSOR; -yy29: -#line 271 - { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; continue; } -yy30: ++YYCURSOR; + if(yych <= '@') goto yy42; + if(yych <= 'Z') goto yy40; + if(yych <= '`') goto yy42; + if(yych >= '{') goto yy42; +yy40: yych = *++YYCURSOR; + goto yy45; +yy41: +#line 290 + { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; } +yy42: yych = *++YYCURSOR; +yy43: +#line 291 + { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; } +yy44: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy31: if(yybm[0+yych] & 128) goto yy30; - goto yy27; +yy45: if(yybm[0+yych] & 128) goto yy44; + goto yy41; } -#line 272 +#line 292 - case STATE_BEFORE_VAL: +state_before_val: + start = YYCURSOR; { YYCTYPE yych; unsigned int yyaccept; @@ -491,184 +641,185 @@ yy31: if(yybm[0+yych] & 128) goto yy30; 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - goto yy32; -yy33: ++YYCURSOR; -yy32: + goto yy46; +yy47: ++YYCURSOR; +yy46: if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; - if(yych == ' ') goto yy34; - if(yych == '=') goto yy36; - goto yy38; -yy34: yyaccept = 0; + if(yych == ' ') goto yy48; + if(yych == '=') goto yy50; + goto yy52; +yy48: yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); - if(yych == ' ') goto yy41; - if(yych == '=') goto yy39; -yy35: -#line 277 - { YYCURSOR--; STATE = STATE_NEXT_ARG; continue; } -yy36: yych = *++YYCURSOR; - goto yy40; -yy37: -#line 276 - { passthru(STD_ARGS); STATE = STATE_VAL; continue; } -yy38: yych = *++YYCURSOR; - goto yy35; -yy39: ++YYCURSOR; + if(yych == ' ') goto yy55; + if(yych == '=') goto yy53; +yy49: +#line 298 + { --YYCURSOR; goto state_next_arg_begin; } +yy50: yych = *++YYCURSOR; + goto yy54; +yy51: +#line 297 + { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; } +yy52: yych = *++YYCURSOR; + goto yy49; +yy53: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy40: if(yybm[0+yych] & 128) goto yy39; - goto yy37; -yy41: ++YYCURSOR; +yy54: if(yybm[0+yych] & 128) goto yy53; + goto yy51; +yy55: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy42: if(yych == ' ') goto yy41; - if(yych == '=') goto yy39; -yy43: YYCURSOR = YYMARKER; +yy56: if(yych == ' ') goto yy55; + if(yych == '=') goto yy53; +yy57: YYCURSOR = YYMARKER; switch(yyaccept){ - case 0: goto yy35; + case 0: goto yy49; } } -#line 278 +#line 299 + - break; - case STATE_VAL: +state_val: + start = YYCURSOR; { YYCTYPE yych; unsigned int yyaccept; static unsigned char yybm[] = { - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 192, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 192, 240, 64, 240, 240, 240, 240, 144, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 0, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, - 240, 240, 240, 240, 240, 240, 240, 240, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 192, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 192, 224, 64, 224, 224, 224, 224, 128, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 0, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, }; - goto yy44; -yy45: ++YYCURSOR; -yy44: - if((YYLIMIT - YYCURSOR) < 3) YYFILL(3); + goto yy58; +yy59: ++YYCURSOR; +yy58: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; if(yych <= '!'){ if(yych <= '\n'){ - if(yych <= '\t') goto yy50; - goto yy51; + if(yych <= '\t') goto yy63; + goto yy65; } else { - if(yych == ' ') goto yy51; - goto yy50; + if(yych == ' ') goto yy65; + goto yy63; } } else { if(yych <= '\''){ - if(yych <= '"') goto yy46; - if(yych <= '&') goto yy50; - goto yy48; + if(yych <= '"') goto yy60; + if(yych <= '&') goto yy63; + goto yy62; } else { - if(yych == '>') goto yy51; - goto yy50; + if(yych == '>') goto yy65; + goto yy63; } } -yy46: yyaccept = 0; +yy60: yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); - if(yych != '>') goto yy63; -yy47: -#line 286 - { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; continue; } -yy48: yyaccept = 1; + if(yych != '>') goto yy74; +yy61: +#line 308 + { passthru(STD_ARGS); goto state_next_arg_begin; } +yy62: yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); - goto yy55; -yy49: -#line 285 - { handle_val(STD_ARGS, 0, '"'); STATE = STATE_NEXT_ARG; continue; } -yy50: yych = *++YYCURSOR; - goto yy53; -yy51: yych = *++YYCURSOR; - goto yy47; -yy52: ++YYCURSOR; + if(yych == '>') goto yy61; + goto yy69; +yy63: yych = *++YYCURSOR; + goto yy67; +yy64: +#line 307 + { handle_val(STD_ARGS, 0, '"'); goto state_next_arg_begin; } +yy65: yych = *++YYCURSOR; + goto yy61; +yy66: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy53: if(yybm[0+yych] & 16) goto yy52; - goto yy49; -yy54: yyaccept = 1; - YYMARKER = ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy55: if(yybm[0+yych] & 32) goto yy54; - if(yych <= '&') goto yy58; - if(yych >= '(') goto yy49; -yy56: yych = *++YYCURSOR; - if(yybm[0+yych] & 16) goto yy52; -yy57: -#line 284 - { handle_val(STD_ARGS, 1, '\''); STATE = STATE_NEXT_ARG; continue; } -yy58: ++YYCURSOR; +yy67: if(yybm[0+yych] & 32) goto yy66; + goto yy64; +yy68: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy59: if(yybm[0+yych] & 64) goto yy58; - if(yych <= '=') goto yy61; -yy60: YYCURSOR = YYMARKER; +yy69: if(yybm[0+yych] & 64) goto yy68; + if(yych <= '=') goto yy71; +yy70: YYCURSOR = YYMARKER; switch(yyaccept){ - case 1: goto yy49; - case 0: goto yy47; + case 0: goto yy61; } -yy61: yych = *++YYCURSOR; - goto yy57; -yy62: ++YYCURSOR; +yy71: yych = *++YYCURSOR; +yy72: +#line 306 + { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; } +yy73: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy63: if(yybm[0+yych] & 128) goto yy62; - if(yych >= '>') goto yy60; -yy64: yych = *++YYCURSOR; -yy65: -#line 283 - { handle_val(STD_ARGS, 1, '"'); STATE = STATE_NEXT_ARG; continue; } +yy74: if(yybm[0+yych] & 128) goto yy73; + if(yych >= '>') goto yy70; +yy75: yych = *++YYCURSOR; +yy76: +#line 305 + { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; } } -#line 287 +#line 309 - break; - } - } stop: - scdebug(("stopped in state %d at pos %d (%d:%c)\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR)); - rest = YYLIMIT - start; - + scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest)); /* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */ if (rest < 0) rest = 0; if (rest) memmove(ctx->buf.c, start, rest); ctx->buf.len = rest; } -/* }}} */ -/* {{{ url_adapt_single_url - */ +char *url_adapt_flush(size_t *newlen) +{ + char *ret = NULL; + url_adapt_state_ex_t *ctx; + BLS_FETCH(); + + ctx = &BG(url_adapt_state_ex); + + if (ctx->buf.len) { + ret = ctx->buf.c; + *newlen = ctx->buf.len; + ctx->buf.c = 0; + ctx->buf.len = 0; + } + + return ret; +} + char *url_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen) { smart_str surl = {0}; @@ -688,10 +839,7 @@ char *url_adapt_single_url(const char *url, size_t urllen, const char *name, con return buf.c; } -/* }}} */ -/* {{{ url_adapt_ext - */ char *url_adapt_ext(const char *src, size_t srclen, const char *name, const char *value, size_t *newlen) { char *ret; @@ -705,11 +853,12 @@ char *url_adapt_ext(const char *src, size_t srclen, const char *name, const char mainloop(ctx, src, srclen); *newlen = ctx->result.len; + if (!ctx->result.c) + smart_str_appendl(&ctx->result, "", 0); smart_str_0(&ctx->result); ctx->result.len = 0; return ctx->result.c; } -/* }}} */ PHP_RINIT_FUNCTION(url_scanner) { @@ -762,12 +911,3 @@ PHP_MSHUTDOWN_FUNCTION(url_scanner) } #endif - -/* - * Local variables: - * tab-width: 4 - * c-basic-offset: 4 - * End: - * vim600: sw=4 ts=4 tw=78 fdm=marker - * vim<600: sw=4 ts=4 tw=78 - */ diff --git a/ext/standard/url_scanner_ex.h b/ext/standard/url_scanner_ex.h index 5ebd9bfe5d..a76bd66f45 100644 --- a/ext/standard/url_scanner_ex.h +++ b/ext/standard/url_scanner_ex.h @@ -28,6 +28,8 @@ char *url_adapt_ext_ex(const char *src, size_t srclen, const char *name, const c char *url_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen); +char *url_adapt_flush(size_t *); + #include "php_smart_str_public.h" typedef struct { diff --git a/ext/standard/url_scanner_ex.re b/ext/standard/url_scanner_ex.re index 681f08c2c0..0d95d68ae5 100644 --- a/ext/standard/url_scanner_ex.re +++ b/ext/standard/url_scanner_ex.re @@ -87,30 +87,37 @@ PHP_INI_BEGIN() STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=fakeentry", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals) PHP_INI_END() +/*!re2c +any = [\000-\377]; +N = (any\[<]); +alpha = [a-zA-Z]; +*/ + +#define YYFILL(n) goto done +#define YYCTYPE unsigned char +#define YYCURSOR p +#define YYLIMIT q +#define YYMARKER r + static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *name, smart_str *val, const char *separator) { - register const char *p, *q; + register const char *p, *q, *r; const char *bash = NULL; const char *sep = "?"; - q = url->c + url->len; - - for (p = url->c; p < q; p++) { - switch(*p) { - case ':': - smart_str_append(dest, url); - return; - case '?': - sep = separator; - break; - case '#': - bash = p; - break; - } - } + q = (p = url->c) + url->len; +scan: +/*!re2c + ":" { smart_str_append(dest, url); return; } + "?" { sep = separator; goto done; } + "#" { bash = p; goto done; } + (any\[:?#])+ { goto scan; } +*/ +done: + /* Don't modify URLs of the format "#mark" */ - if (bash - url->c == 0) { + if (bash && bash - url->c == 0) { smart_str_append(dest, url); return; } @@ -129,6 +136,12 @@ static inline void append_modified_url(smart_str *url, smart_str *dest, smart_st smart_str_appendl(dest, bash, q - bash); } +#undef YYFILL +#undef YYCTYPE +#undef YYCURSOR +#undef YYLIMIT +#undef YYMARKER + static inline void tag_arg(url_adapt_state_ex_t *ctx, char quote PLS_DC) { char f = 0; @@ -146,7 +159,7 @@ static inline void tag_arg(url_adapt_state_ex_t *ctx, char quote PLS_DC) } enum { - STATE_PLAIN, + STATE_PLAIN = 0, STATE_TAG, STATE_NEXT_ARG, STATE_ARG, @@ -232,68 +245,72 @@ static inline void mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size YYCURSOR = ctx->buf.c; YYLIMIT = ctx->buf.c + ctx->buf.len; -/*!re2c -any = [\000-\377]; -alpha = [a-zA-Z]; -*/ + switch (STATE) { + case STATE_PLAIN: goto state_plain; + case STATE_TAG: goto state_tag; + case STATE_NEXT_ARG: goto state_next_arg; + case STATE_ARG: goto state_arg; + case STATE_BEFORE_VAL: goto state_before_val; + case STATE_VAL: goto state_val; + } + + +state_plain_begin: + STATE = STATE_PLAIN; - while(1) { - start = YYCURSOR; - scdebug(("state %d at %s\n", STATE, YYCURSOR)); - switch(STATE) { - - case STATE_PLAIN: +state_plain: + start = YYCURSOR; /*!re2c - [<] { passthru(STD_ARGS); STATE = STATE_TAG; continue; } - (any\[<]) { passthru(STD_ARGS); continue; } + "<" { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; } + N+ { passthru(STD_ARGS); goto state_plain; } */ - break; - - case STATE_TAG: + +state_tag: + start = YYCURSOR; /*!re2c - alpha+ { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); continue; } - any { passthru(STD_ARGS); STATE = STATE_PLAIN; continue; } + alpha+ { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; } + any { passthru(STD_ARGS); goto state_plain_begin; } */ - break; - - case STATE_NEXT_ARG: + +state_next_arg_begin: + STATE = STATE_NEXT_ARG; + +state_next_arg: + start = YYCURSOR; /*!re2c - ">" { passthru(STD_ARGS); handle_form(STD_ARGS); STATE = STATE_PLAIN; continue; } - [ \n] { passthru(STD_ARGS); continue; } - alpha { YYCURSOR--; STATE = STATE_ARG; continue; } - any { passthru(STD_ARGS); STATE = STATE_PLAIN; continue; } + ">" { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; } + [ \v\t\n]+ { passthru(STD_ARGS); goto state_next_arg; } + alpha { --YYCURSOR; STATE = STATE_ARG; goto state_arg; } + any { passthru(STD_ARGS); goto state_plain_begin; } */ - break; - case STATE_ARG: +state_arg: + start = YYCURSOR; /*!re2c - alpha+ { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; continue; } - any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; continue; } + alpha+ { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; } + any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; } */ - case STATE_BEFORE_VAL: +state_before_val: + start = YYCURSOR; /*!re2c - [ ]* "=" [ ]* { passthru(STD_ARGS); STATE = STATE_VAL; continue; } - any { YYCURSOR--; STATE = STATE_NEXT_ARG; continue; } + [ ]* "=" [ ]* { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; } + any { --YYCURSOR; goto state_next_arg_begin; } */ - break; - case STATE_VAL: + +state_val: + start = YYCURSOR; /*!re2c - ["] (any\[">])* ["] { handle_val(STD_ARGS, 1, '"'); STATE = STATE_NEXT_ARG; continue; } - ['] (any\['>])* ['] { handle_val(STD_ARGS, 1, '\''); STATE = STATE_NEXT_ARG; continue; } - (any\[ \n>"])+ { handle_val(STD_ARGS, 0, '"'); STATE = STATE_NEXT_ARG; continue; } - any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; continue; } + ["] (any\[">])* ["] { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; } + ['] (any\['>])* ['] { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; } + (any\[ \n>"'])+ { handle_val(STD_ARGS, 0, '"'); goto state_next_arg_begin; } + any { passthru(STD_ARGS); goto state_next_arg_begin; } */ - break; - } - } stop: - scdebug(("stopped in state %d at pos %d (%d:%c)\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR)); - rest = YYLIMIT - start; - + scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest)); /* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */ if (rest < 0) rest = 0; @@ -301,6 +318,24 @@ stop: ctx->buf.len = rest; } +char *url_adapt_flush(size_t *newlen) +{ + char *ret = NULL; + url_adapt_state_ex_t *ctx; + BLS_FETCH(); + + ctx = &BG(url_adapt_state_ex); + + if (ctx->buf.len) { + ret = ctx->buf.c; + *newlen = ctx->buf.len; + ctx->buf.c = 0; + ctx->buf.len = 0; + } + + return ret; +} + char *url_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen) { smart_str surl = {0}; @@ -334,6 +369,8 @@ char *url_adapt_ext(const char *src, size_t srclen, const char *name, const char mainloop(ctx, src, srclen); *newlen = ctx->result.len; + if (!ctx->result.c) + smart_str_appendl(&ctx->result, "", 0); smart_str_0(&ctx->result); ctx->result.len = 0; return ctx->result.c; diff --git a/main/output.c b/main/output.c index c29ef41900..1509cb277b 100644 --- a/main/output.c +++ b/main/output.c @@ -254,10 +254,15 @@ PHPAPI void php_end_ob_buffer(zend_bool send_buffer, zend_bool just_flush) PHPAPI void php_end_ob_buffers(zend_bool send_buffer) { OLS_FETCH(); + BLS_FETCH(); while (OG(nesting_level)!=0) { php_end_ob_buffer(send_buffer, 0); } + + if (send_buffer && BG(use_trans_sid)) { + session_adapt_flush(OG(php_header_write)); + } } /* }}} */ -- 2.40.0