-/* Generated by re2c 0.5 on Sat Nov 27 16:22:34 1999 */
-#line 1 "../../../php4/ext/standard/url_scanner.re"
/*
+----------------------------------------------------------------------+
| PHP version 4.0 |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
- | Author: Sascha Schumann <sascha@schumann.cx> |
+ | Author: Sascha Schumann <sascha@schumann.cx> |
+ | Hartmut Holzgraefe <hartmut@six.de> |
+----------------------------------------------------------------------+
*/
/* $Id$ */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include "php.h"
+#include "basic_functions.h"
+#include "url_scanner.h"
-#undef MIN
-#define MIN(a,b) (a)<(b)?(a):(b)
-
-#define YYCTYPE char
-#define YYCURSOR state->crs
-#define YYLIMIT state->end
-#define YYMARKER state->ptr
-#define YYFILL(n)
-
-typedef enum {
- INITIAL,
- REF
-} state;
-
-typedef struct {
- state state;
- const char *crs;
- const char *end;
- const char *ptr;
- const char *start;
- char *target;
- size_t targetsize;
- const char *data;
-} lexdata;
-
-#define FINISH { catchup(state); goto finish; }
+#define BUFSIZE 256
-#define BEGIN(x) \
- switch(state->state) { \
- case INITIAL: \
- catchup(state); \
- break; \
- case REF: \
- screw_url(state); \
- break; \
- } \
- state->state = x; \
- state->start = state->crs; \
- goto nextiter
+PHP_RINIT_FUNCTION(url_scanner) {
+ url_adapt(NULL,0,NULL,NULL);
+}
-#define ATTACH(s, n) \
-{ \
- size_t _newlen = state->targetsize + n; \
- state->target = realloc(state->target, _newlen + 1); \
- memcpy(state->target + state->targetsize, s, n); \
- state->targetsize = _newlen; \
- state->target[_newlen] = '\0'; \
+PHP_RSHUTDOWN_FUNCTION(url_scanner) {
+ url_adapt(NULL,0,NULL,NULL);
}
-
-#define URLLEN 512
-
-static void screw_url(lexdata *state)
-{
- int len;
- char buf[URLLEN];
- char url[URLLEN];
- const char *p, *q;
- char c;
- /* search outer limits for URI */
- for(p = state->start; p < state->crs && (c = *p); p++)
- if(!isspace(c)) break;
- if(c=='"') p++;
- for(; p < state->crs && (c = *p); p++)
- if(!isspace(c)) break;
+static char *url_attr_addon(const char *tag,const char *attr,const char *val,const char *buf) {
+ int flag = 0;
- /*
- * we look at q-1, because q points to the character behind the last
- * character we are going to copy and the decision is based on that last
- * character
- */
+ if(!strcasecmp(tag,"a")&&!strcasecmp(attr,"href")) {
+ flag = 1;
+ } else if(!strcasecmp(tag,"area")&&!strcasecmp(attr,"href")) {
+ flag = 1;
+ } else if(!strcasecmp(tag,"form")&&!strcasecmp(attr,"action")) {
+ flag = 1;
+ } else if(!strcasecmp(tag,"frame")&&!strcasecmp(attr,"source")) {
+ flag = 1;
+ } else if(!strcasecmp(tag,"img")&&!strcasecmp(attr,"action")) {
+ flag = 1;
+ }
+ if(flag) {
+ if(!strstr(val,buf))
+ {
+ char *p = (char *)emalloc(strlen(buf)+2);
+ *p=strchr(val,'?')?'&':'?';
+ strcpy(p+1,buf);
+ return p;
+ }
+ }
+ return NULL;
+}
- for(q = state->crs; q > state->start && (c = *(q-1)); q--)
- if(!isspace(c)) break;
- if(c=='"') q--;
- for(; q > state->start && (c = *(q-1)); q--)
- if(!isspace(c)) break;
+#define US BG(url_adapt_state)
- if(q<p) { p=state->start; q=state->crs; }
+char *url_adapt(const char *src, size_t srclen, const char *data, size_t *newlen)
+{
+ char *out,*outp;
+ int maxl,n,no_output;
- /* attach beginning */
- ATTACH(state->start, p-state->start);
-
- /* copy old URI */
- len = MIN(q - p, sizeof(buf) - 1);
+ if(src==NULL) {
+ US.state=STATE_NORMAL;
+ if(US.tag) {efree(US.tag); US.tag =NULL; }
+ if(US.attr) {efree(US.attr); US.attr=NULL; }
+ if(US.val) {efree(US.val); US.val =NULL; }
+ return NULL;
+ }
- memcpy(url, p, len);
- url[len] = '\0';
-
- /* construct new URI */
- len = snprintf(buf, sizeof(buf), "%s%c%s", url,
- memchr(state->start, '?', len) ? '&' : '?',
- state->data);
+ if(srclen==0)
+ srclen=strlen(src);
- /* attach new URI */
- ATTACH(buf, len);
-
- /* attach rest */
- ATTACH(q, state->crs - q);
-}
+ out=malloc(srclen+1);
+ maxl=srclen;
+ n=srclen;
+ no_output=0;
-static void catchup(lexdata *state)
-{
- ATTACH(state->start, (state->crs - state->start));
-}
+ *newlen=0;
+ outp=out;
-#line 144
+ while(n--) {
+ switch(US.state) {
+ case STATE_NORMAL:
+ if(*src=='<')
+ US.state=STATE_TAG_START;
+ break;
+ case STATE_TAG_START:
+ if(! isalnum(*src))
+ US.state=STATE_NORMAL;
+ US.state=STATE_TAG;
+ US.ml=BUFSIZE;
+ US.p=US.tag=erealloc(US.tag,US.ml);
+ *(US.p)++=*src;
+ US.l=1;
+ break;
-static void url_scanner(lexdata *state)
-{
- while(state->crs < state->end) {
-
- switch(state->state) {
- case INITIAL:
-{
- YYCTYPE yych;
- unsigned int yyaccept;
- goto yy0;
-yy1: ++YYCURSOR;
-yy0:
- if((YYLIMIT - YYCURSOR) < 7) YYFILL(7);
- yych = *YYCURSOR;
- switch(yych){
- case '\000': goto yy7;
- case '<': goto yy2;
- default: goto yy4;
- }
-yy2: yych = *++YYCURSOR;
- switch(yych){
- case 'A': case 'a': goto yy9;
- case 'F': case 'f': goto yy10;
- default: goto yy3;
- }
-yy3:yy4: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy5: switch(yych){
- case '\000': case '<': goto yy6;
- default: goto yy4;
- }
-yy6:
-#line 157
- { BEGIN(INITIAL); }
-yy7: yych = *++YYCURSOR;
-yy8:
-#line 158
- { FINISH; }
-yy9: yych = *++YYCURSOR;
- switch(yych){
- case 'H': case 'h': goto yy3;
- case 'R': case 'r': goto yy41;
- default: goto yy40;
- }
-yy10: yych = *++YYCURSOR;
- switch(yych){
- case 'O': case 'o': goto yy12;
- case 'R': case 'r': goto yy11;
- default: goto yy3;
- }
-yy11: yych = *++YYCURSOR;
- switch(yych){
- case 'A': case 'a': goto yy27;
- default: goto yy3;
- }
-yy12: yych = *++YYCURSOR;
- switch(yych){
- case 'R': case 'r': goto yy13;
- default: goto yy3;
- }
-yy13: yych = *++YYCURSOR;
- switch(yych){
- case 'M': case 'm': goto yy14;
- default: goto yy3;
- }
-yy14: yych = *++YYCURSOR;
- switch(yych){
- case 'A': case 'a': goto yy3;
- default: goto yy16;
- }
-yy15: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy16: switch(yych){
- case '\t': case '\v':
- case '\f': case ' ': goto yy15;
- case 'A': case 'a': goto yy17;
- default: goto yy3;
- }
-yy17: yych = *++YYCURSOR;
- switch(yych){
- case 'C': case 'c': goto yy18;
- default: goto yy3;
- }
-yy18: yych = *++YYCURSOR;
- switch(yych){
- case 'T': case 't': goto yy19;
- default: goto yy3;
- }
-yy19: yych = *++YYCURSOR;
- switch(yych){
- case 'I': case 'i': goto yy20;
- default: goto yy3;
- }
-yy20: yych = *++YYCURSOR;
- switch(yych){
- case 'O': case 'o': goto yy21;
- default: goto yy3;
- }
-yy21: yych = *++YYCURSOR;
- switch(yych){
- case 'N': case 'n': goto yy22;
- default: goto yy3;
- }
-yy22: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy23: switch(yych){
- case '\t': case '\v':
- case '\f': case ' ': goto yy22;
- case '=': goto yy24;
- default: goto yy3;
- }
-yy24: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy25: switch(yych){
- case '\t': case '\v':
- case '\f': case ' ': goto yy24;
- default: goto yy26;
- }
-yy26:
-#line 155
- { BEGIN(REF); }
-yy27: yych = *++YYCURSOR;
- switch(yych){
- case 'M': case 'm': goto yy28;
- default: goto yy3;
- }
-yy28: yych = *++YYCURSOR;
- switch(yych){
- case 'E': case 'e': goto yy29;
- default: goto yy3;
- }
-yy29: yych = *++YYCURSOR;
- switch(yych){
- case 'S': case 's': goto yy3;
- default: goto yy31;
- }
-yy30: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy31: switch(yych){
- case '\t': case '\v':
- case '\f': case ' ': goto yy30;
- case 'S': case 's': goto yy32;
- default: goto yy3;
- }
-yy32: yych = *++YYCURSOR;
- switch(yych){
- case 'R': case 'r': goto yy33;
- default: goto yy3;
- }
-yy33: yych = *++YYCURSOR;
- switch(yych){
- case 'C': case 'c': goto yy34;
- default: goto yy3;
- }
-yy34: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy35: switch(yych){
- case '\t': case '\v':
- case '\f': case ' ': goto yy34;
- case '=': goto yy36;
- default: goto yy3;
- }
-yy36: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy37: switch(yych){
- case '\t': case '\v':
- case '\f': case ' ': goto yy36;
- default: goto yy38;
- }
-yy38:
-#line 153
- { BEGIN(REF); }
-yy39: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy40: switch(yych){
- case '\t': case '\v':
- case '\f': case ' ': goto yy39;
- case 'H': case 'h': goto yy54;
- default: goto yy3;
- }
-yy41: yych = *++YYCURSOR;
- switch(yych){
- case 'E': case 'e': goto yy42;
- default: goto yy3;
- }
-yy42: yych = *++YYCURSOR;
- switch(yych){
- case 'A': case 'a': goto yy43;
- default: goto yy3;
- }
-yy43: yych = *++YYCURSOR;
- switch(yych){
- case 'H': case 'h': goto yy3;
- default: goto yy45;
- }
-yy44: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy45: switch(yych){
- case '\t': case '\v':
- case '\f': case ' ': goto yy44;
- case 'H': case 'h': goto yy46;
- default: goto yy3;
- }
-yy46: yych = *++YYCURSOR;
- switch(yych){
- case 'R': case 'r': goto yy47;
- default: goto yy3;
- }
-yy47: yych = *++YYCURSOR;
- switch(yych){
- case 'E': case 'e': goto yy48;
- default: goto yy3;
- }
-yy48: yych = *++YYCURSOR;
- switch(yych){
- case 'F': case 'f': goto yy49;
- default: goto yy3;
- }
-yy49: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy50: switch(yych){
- case '\t': case '\v':
- case '\f': case ' ': goto yy49;
- case '=': goto yy51;
- default: goto yy3;
- }
-yy51: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy52: switch(yych){
- case '\t': case '\v':
- case '\f': case ' ': goto yy51;
- default: goto yy53;
- }
-yy53:
-#line 156
- { BEGIN(REF); }
-yy54: yych = *++YYCURSOR;
- switch(yych){
- case 'R': case 'r': goto yy55;
- default: goto yy3;
- }
-yy55: yych = *++YYCURSOR;
- switch(yych){
- case 'E': case 'e': goto yy56;
- default: goto yy3;
- }
-yy56: yych = *++YYCURSOR;
- switch(yych){
- case 'F': case 'f': goto yy57;
- default: goto yy3;
- }
-yy57: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy58: switch(yych){
- case '\t': case '\v':
- case '\f': case ' ': goto yy57;
- case '=': goto yy59;
- default: goto yy3;
- }
-yy59: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy60: switch(yych){
- case '\t': case '\v':
- case '\f': case ' ': goto yy59;
- default: goto yy61;
- }
-yy61:
-#line 154
- { BEGIN(REF); }
-}
-#line 159
+ case STATE_TAG:
+ if(isalnum(*src)) {
+ *(US.p)++ = *src;
+ US.l++;
+ if(US.l==US.ml) {
+ US.ml+=BUFSIZE;
+ US.tag=erealloc(US.tag,US.ml);
+ US.p = US.tag+US.l;
+ }
+ } else if (isspace(*src)) {
+ US.state = STATE_IN_TAG;
+ *US.p='\0';
+ US.tag=erealloc(US.tag,US.l);
+ } else {
+ US.state = STATE_NORMAL;
+ efree(US.tag);
+ US.tag=NULL;
+ }
+ break;
+ case STATE_IN_TAG:
+ if(isalnum(*src)) {
+ US.state=STATE_TAG_ATTR;
+ US.ml=BUFSIZE;
+ US.p=US.attr=erealloc(US.attr,US.ml);
+ *(US.p)++=*src;
+ US.l=1;
+ } else if (! isspace(*src)) {
+ US.state = STATE_NORMAL;
+ efree(US.tag);
+ US.tag=NULL;
+ }
break;
- case REF:
-{
- YYCTYPE yych;
- unsigned int yyaccept;
- goto yy62;
-yy63: ++YYCURSOR;
-yy62:
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
- switch(yych){
- case '\000': case '>': goto yy64;
- case '\t': case '\v':
- case '\f': case ' ': case '"': goto yy65;
- case '#': goto yy69;
- case ':': goto yy71;
- default: goto yy67;
- }
-yy64:
-#line 163
- { BEGIN(INITIAL); }
-yy65: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy66: switch(yych){
- case '\000': case '>': goto yy64;
- case '\t': case '\v':
- case '\f': case ' ': goto yy65;
- case '"': goto yy79;
- case '#': goto yy69;
- case ':': goto yy71;
- default: goto yy67;
- }
-yy67: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy68: switch(yych){
- case '\000': case '>': goto yy64;
- case '\t': case '\v':
- case '\f': case ' ': goto yy77;
- case '"': goto yy79;
- case '#': goto yy69;
- case ':': goto yy71;
- default: goto yy67;
- }
-yy69: yych = *++YYCURSOR;
-yy70: YYCURSOR -= 1;
-#line 164
- { BEGIN(INITIAL); }
-yy71: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy72: switch(yych){
- case '\000': case '#': case '>': goto yy73;
- case '\t': case '\v':
- case '\f': case ' ': goto yy74;
- case '"': goto yy76;
- default: goto yy71;
- }
-yy73:
-#line 165
- {
- /* don't modify absolute links */
- state->state = INITIAL; BEGIN(INITIAL);
- }
-yy74: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy75: switch(yych){
- case '\t': case '\v':
- case '\f': case ' ': goto yy74;
- case '"': goto yy76;
- default: goto yy73;
- }
-yy76: yych = *++YYCURSOR;
- goto yy73;
-yy77: ++YYCURSOR;
- if(YYLIMIT == YYCURSOR) YYFILL(1);
- yych = *YYCURSOR;
-yy78: switch(yych){
- case '\t': case '\v':
- case '\f': case ' ': goto yy77;
- case '"': goto yy79;
- default: goto yy64;
- }
-yy79: yych = *++YYCURSOR;
- goto yy64;
-}
-#line 169
- break;
- }
-nextiter:
- ;
- }
-finish:
- ;
-}
+ case STATE_TAG_ATTR:
+ if(isalnum(*src)) {
+ *US.p++=*src;
+ ++US.l;
+ if(US.l==US.ml) {
+ US.ml+=BUFSIZE;
+ US.attr=erealloc(US.attr,US.ml);
+ US.p = US.attr+US.l;
+ }
+ if(US.l==US.ml) {
+ US.ml+=BUFSIZE;
+ US.attr=erealloc(US.attr,US.ml);
+ US.p = US.attr+US.l;
+ }
+ } else if(isspace(*src)||(*src=='=')){
+ US.state=STATE_TAG_IS;
+ *US.p=0;
+ US.attr=erealloc(US.attr,US.l);
+ } else if(*src=='>') {
+ US.state=STATE_NORMAL;
+ } else {
+ efree(US.attr);
+ US.attr=NULL;
+ US.state=STATE_IN_TAG;
+ }
+ break;
+
+ case STATE_TAG_IS:
+ case STATE_TAG_IS2:
+ if(!isspace(*src)) {
+ US.ml=BUFSIZE;
+ US.p=US.val=erealloc(US.val,US.ml);
+ US.l=0;
+ if((*src=='"')||(*src=='\'')) {
+ US.state=STATE_TAG_QVAL2;
+ US.delim=*src;
+ } else {
+ US.state=STATE_TAG_VAL;
+ *US.p++=*src;
+ US.l++;
+ }
+ }
+ break;
-char *url_adapt(const char *src, size_t srclen, const char *data, size_t *newlen)
-{
- lexdata state;
- state.state = INITIAL;
- state.start = state.crs = src;
- state.end = src + srclen;
- state.ptr = NULL;
- state.target = NULL;
- state.targetsize = 0;
- state.data = data;
+ case STATE_TAG_QVAL2:
+ if(*src==US.delim) {
+ char *p;
+ US.state=STATE_IN_TAG;
+ *US.p='\0';
+ p=url_attr_addon(US.tag,US.attr,US.val,data);
+ if(p) {
+ int l= strlen(p);
+ maxl+=l;
+ out=realloc(out,maxl);
+ outp=out+*newlen;
+ strcpy(outp,p);
+ outp+=l;
+ *newlen+=l;
+ efree(p);
+ }
+ break;
+ } else if(*src=='\\') {
+ no_output=1;
+ US.state=STATE_TAG_QVAL2b;
+ } else if (*src=='>') {
+ US.state=STATE_NORMAL;
+ }
+
+ *US.p++=*src;
+ ++US.l;
+ if(US.l==US.ml) {
+ US.ml+=BUFSIZE;
+ US.val=erealloc(US.val,US.ml);
+ US.p = US.val+US.l;
+ }
+
+ break;
+
+ case STATE_TAG_QVAL2b:
+ US.state=STATE_TAG_QVAL2;
+ *US.p++=*src;
+ ++US.l;
+ if(US.l==US.ml) {
+ US.ml+=BUFSIZE;
+ US.val=erealloc(US.val,US.ml);
+ US.p = US.val+US.l;
+ }
+ break;
- url_scanner(&state);
+ case STATE_TAG_VAL:
+ if(!isspace(*src)) {
+ if((*src=='"')||(*src=='\'')) {
+ US.state=STATE_TAG_QVAL2;
+ US.delim=*src;
+ } else {
+ *US.p++=*src;
+ US.l++;
+ if(US.l==US.ml) {
+ US.ml+=BUFSIZE;
+ US.val=erealloc(US.val,US.ml);
+ US.p = US.val+US.l;
+ }
+ US.state=STATE_TAG_VAL2;
+ }
+ }
+ break;
- if(newlen) *newlen = state.targetsize;
+ case STATE_TAG_VAL2:
+ if(isspace(*src)||(*src=='>')) {
+ char *p;
+ US.state=(*src=='>')?STATE_NORMAL:STATE_IN_TAG;
+ *US.p='\0';
+ p=url_attr_addon(US.tag,US.attr,US.val,data);
+ if(p) {
+ int l= strlen(p);
+ maxl+=l;
+ out=realloc(out,maxl);
+ outp=out+*newlen;
+ strcpy(outp,p);
+ outp+=l;
+ *newlen+=l;
+ efree(p);
+ }
+ } else {
+ *US.p++=*src;
+ US.l++;
+ if(US.l==US.ml) {
+ US.ml+=BUFSIZE;
+ US.val=erealloc(US.val,US.ml);
+ US.p = US.val+US.l;
+ }
+ }
+ break;
+ }
- return state.target;
+ if(no_output) {
+ src++;
+ no_output=0;
+ continue;
+ }
+ *outp++=*src++;
+ *newlen+=1;
+ }
+ *outp='\0';
+ return out;
}
#endif