]> granicus.if.org Git - php/commitdiff
trans-sid rewrite, is now state-aware (fix for bug #3411 and friends)
authorHartmut Holzgraefe <hholzgra@php.net>
Tue, 1 Aug 2000 07:45:14 +0000 (07:45 +0000)
committerHartmut Holzgraefe <hholzgra@php.net>
Tue, 1 Aug 2000 07:45:14 +0000 (07:45 +0000)
ext/standard/Makefile.in
ext/standard/basic_functions.c
ext/standard/basic_functions.h
ext/standard/url_scanner.c
ext/standard/url_scanner.h

index 8af72fa31752ad335e95611119a79283fd82b784..38501f0d5f63fd8d4cff2e9a3d093ab3ca71405a 100644 (file)
@@ -13,5 +13,3 @@ include $(top_srcdir)/build/dynlib.mk
 
 parsedate.c: $(srcdir)/parsedate.y
 
-$(srcdir)/url_scanner.c: $(srcdir)/url_scanner.re
-       -re2c $< > $@.new && mv $@.new $@
index 53acc21c977596cdab2d9a4a0f86877898baf226..c5d1ba0799ca984666c9c13e96c2496275ec6f36 100644 (file)
@@ -729,6 +729,10 @@ PHP_RINIT_FUNCTION(basic)
        PHP_RINIT(assert)(INIT_FUNC_ARGS_PASSTHRU);
        PHP_RINIT(dir)(INIT_FUNC_ARGS_PASSTHRU);
 
+#ifdef TRANS_SID
+       PHP_RINIT(url_scanner)(INIT_FUNC_ARGS_PASSTHRU);
+#endif
+       
        return SUCCESS;
 }
 
@@ -754,6 +758,10 @@ PHP_RSHUTDOWN_FUNCTION(basic)
        PHP_RSHUTDOWN(syslog)(SHUTDOWN_FUNC_ARGS_PASSTHRU);
        PHP_RSHUTDOWN(assert)(SHUTDOWN_FUNC_ARGS_PASSTHRU);
 
+#ifdef TRANS_SID
+       PHP_RSHUTDOWN(url_scanner)(INIT_FUNC_ARGS_PASSTHRU);
+#endif
+
        return SUCCESS;
 }
 
index 5f41801439dff9c93cab9a109471323403ddff11..32d90c9aaf49b941882ec9c0daaffe41e371d399 100644 (file)
 
 #include "zend_highlight.h"
 
+#ifdef TRANS_SID
+#  include "url_scanner.h"
+#endif
+
 extern zend_module_entry basic_functions_module;
 #define basic_functions_module_ptr &basic_functions_module
 
@@ -161,6 +165,11 @@ typedef struct {
 
        /* var.c */
        zend_class_entry *incomplete_class;
+
+#ifdef TRANS_SID
+       /* url_scanner.c */
+       url_adapt_state_t url_adapt_state; 
+#endif
 } php_basic_globals;
 
 #ifdef ZTS
index eed19f4db9fbc0462fce55b5ecfc44dc27642020..8a1c72c6726fadfb42ec5020ea5a09b80ddc6a2d 100644 (file)
@@ -1,5 +1,3 @@
-/* Generated by re2c 0.5 on Sat Nov 27 16:22:34 1999 */
-#line 1 "../../../php4/ext/standard/url_scanner.re"
 /*
    +----------------------------------------------------------------------+
    | PHP version 4.0                                                      |
@@ -14,7 +12,8 @@
    | obtain it through the world-wide-web, please send a note to          |
    | license@php.net so we can mail you a copy immediately.               |
    +----------------------------------------------------------------------+
-   | Author: Sascha Schumann <sascha@schumann.cx>                         |
+   | Author: Sascha Schumann    <sascha@schumann.cx>                      |
+   |         Hartmut Holzgraefe <hartmut@six.de>                          |
    +----------------------------------------------------------------------+
  */
 /* $Id$ */
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include "php.h"
+#include "basic_functions.h"
+#include "url_scanner.h"
 
-#undef MIN
-#define MIN(a,b) (a)<(b)?(a):(b)
-
-#define YYCTYPE char
-#define YYCURSOR state->crs
-#define YYLIMIT state->end
-#define YYMARKER state->ptr
-#define YYFILL(n)
-
-typedef enum {
-       INITIAL,
-       REF
-} state;
-
-typedef struct {
-       state state;
-       const char *crs;
-       const char *end;
-       const char *ptr;
-       const char *start;
-       char *target;
-       size_t targetsize;
-       const char *data;
-} lexdata;
-
-#define FINISH { catchup(state); goto finish; }
+#define BUFSIZE 256
 
-#define BEGIN(x)                                               \
-               switch(state->state) {                  \
-                       case INITIAL:                           \
-                               catchup(state);                 \
-                               break;                                  \
-                       case REF:                                       \
-                               screw_url(state);               \
-                               break;                                  \
-               }                                                               \
-               state->state = x;                               \
-               state->start = state->crs;              \
-               goto nextiter
+PHP_RINIT_FUNCTION(url_scanner) {
+       url_adapt(NULL,0,NULL,NULL);
+}
 
-#define ATTACH(s, n)                                                                           \
-{                                                                                                                      \
-       size_t _newlen = state->targetsize + n;                                 \
-       state->target = realloc(state->target, _newlen + 1);    \
-       memcpy(state->target + state->targetsize, s, n);                \
-       state->targetsize = _newlen;                                                    \
-       state->target[_newlen] = '\0';                                                  \
+PHP_RSHUTDOWN_FUNCTION(url_scanner) {
+       url_adapt(NULL,0,NULL,NULL);
 }
-       
-#define URLLEN 512
-       
-static void screw_url(lexdata *state)
-{
-       int len;
-       char buf[URLLEN];
-       char url[URLLEN];
-       const char *p, *q;
-       char c;
 
 
-       /* search outer limits for URI */
-       for(p = state->start; p < state->crs && (c = *p); p++)
-               if(!isspace(c)) break;
-       if(c=='"') p++;
-       for(; p < state->crs && (c = *p); p++)
-               if(!isspace(c)) break;
+static char *url_attr_addon(const char *tag,const char *attr,const char *val,const char *buf) {
+       int flag = 0;
 
-       /*  
-        *  we look at q-1, because q points to the character behind the last
-        *  character we are going to copy and the decision is based on that last
-        *  character 
-        */
+       if(!strcasecmp(tag,"a")&&!strcasecmp(attr,"href")) {
+               flag = 1;
+       } else if(!strcasecmp(tag,"area")&&!strcasecmp(attr,"href")) {
+               flag = 1;
+       } else if(!strcasecmp(tag,"form")&&!strcasecmp(attr,"action")) {
+               flag = 1;
+       } else if(!strcasecmp(tag,"frame")&&!strcasecmp(attr,"source")) {
+               flag = 1;
+       } else if(!strcasecmp(tag,"img")&&!strcasecmp(attr,"action")) {
+               flag = 1;
+       }
+       if(flag) {
+               if(!strstr(val,buf))
+                       {
+                               char *p = (char *)emalloc(strlen(buf)+2);
+                               *p=strchr(val,'?')?'&':'?';
+                               strcpy(p+1,buf);
+                               return p;
+                       }
+       } 
+       return NULL;
+}
 
-       for(q = state->crs; q > state->start && (c = *(q-1)); q--)
-               if(!isspace(c)) break;
-       if(c=='"') q--;
-       for(; q > state->start && (c = *(q-1)); q--)
-               if(!isspace(c)) break;
+#define US BG(url_adapt_state)
 
-       if(q<p) { p=state->start; q=state->crs; }
+char *url_adapt(const char *src, size_t srclen, const char *data, size_t *newlen)
+{
+       char *out,*outp;
+       int maxl,n,no_output;
 
-       /* attach beginning */
-       ATTACH(state->start, p-state->start);
-       
-       /* copy old URI */
-       len = MIN(q - p, sizeof(buf) - 1);
+       if(src==NULL) {
+               US.state=STATE_NORMAL;
+               if(US.tag)  {efree(US.tag);  US.tag =NULL; }
+               if(US.attr) {efree(US.attr); US.attr=NULL; }
+               if(US.val)  {efree(US.val);  US.val =NULL; }
+               return NULL;
+       }
 
-       memcpy(url, p, len);
-       url[len] = '\0';
-       
-       /* construct new URI */
-       len = snprintf(buf, sizeof(buf), "%s%c%s", url,
-                       memchr(state->start, '?', len) ? '&' : '?',
-                       state->data);
+       if(srclen==0) 
+               srclen=strlen(src);
 
-       /* attach new URI */
-       ATTACH(buf, len);
-       
-       /* attach rest */
-       ATTACH(q, state->crs - q);
-}
+       out=malloc(srclen+1);
+       maxl=srclen;
+       n=srclen;
+       no_output=0;
 
-static void catchup(lexdata *state) 
-{
-       ATTACH(state->start, (state->crs - state->start));
-}
+       *newlen=0;
+       outp=out;
 
-#line 144
+       while(n--) {
+               switch(US.state) {
+               case STATE_NORMAL:
+                       if(*src=='<') 
+                               US.state=STATE_TAG_START;
+                       break;
 
+               case STATE_TAG_START:
+                       if(! isalnum(*src))
+                               US.state=STATE_NORMAL;
+                       US.state=STATE_TAG;
+                       US.ml=BUFSIZE;
+                       US.p=US.tag=erealloc(US.tag,US.ml);
+                       *(US.p)++=*src;
+                       US.l=1;
+                       break;
 
-static void url_scanner(lexdata *state)
-{
-       while(state->crs < state->end) {
-       
-       switch(state->state) {
-               case INITIAL: 
-{
-       YYCTYPE yych;
-       unsigned int yyaccept;
-       goto yy0;
-yy1:   ++YYCURSOR;
-yy0:
-       if((YYLIMIT - YYCURSOR) < 7) YYFILL(7);
-       yych = *YYCURSOR;
-       switch(yych){
-       case '\000':    goto yy7;
-       case '<':       goto yy2;
-       default:        goto yy4;
-       }
-yy2:   yych = *++YYCURSOR;
-       switch(yych){
-       case 'A':       case 'a':       goto yy9;
-       case 'F':       case 'f':       goto yy10;
-       default:        goto yy3;
-       }
-yy3:yy4:       ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy5:   switch(yych){
-       case '\000':    case '<':       goto yy6;
-       default:        goto yy4;
-       }
-yy6:
-#line 157
-       { BEGIN(INITIAL); }
-yy7:   yych = *++YYCURSOR;
-yy8:
-#line 158
-       { FINISH; }
-yy9:   yych = *++YYCURSOR;
-       switch(yych){
-       case 'H':       case 'h':       goto yy3;
-       case 'R':       case 'r':       goto yy41;
-       default:        goto yy40;
-       }
-yy10:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'O':       case 'o':       goto yy12;
-       case 'R':       case 'r':       goto yy11;
-       default:        goto yy3;
-       }
-yy11:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'A':       case 'a':       goto yy27;
-       default:        goto yy3;
-       }
-yy12:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'R':       case 'r':       goto yy13;
-       default:        goto yy3;
-       }
-yy13:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'M':       case 'm':       goto yy14;
-       default:        goto yy3;
-       }
-yy14:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'A':       case 'a':       goto yy3;
-       default:        goto yy16;
-       }
-yy15:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy16:  switch(yych){
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy15;
-       case 'A':       case 'a':       goto yy17;
-       default:        goto yy3;
-       }
-yy17:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'C':       case 'c':       goto yy18;
-       default:        goto yy3;
-       }
-yy18:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'T':       case 't':       goto yy19;
-       default:        goto yy3;
-       }
-yy19:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'I':       case 'i':       goto yy20;
-       default:        goto yy3;
-       }
-yy20:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'O':       case 'o':       goto yy21;
-       default:        goto yy3;
-       }
-yy21:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'N':       case 'n':       goto yy22;
-       default:        goto yy3;
-       }
-yy22:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy23:  switch(yych){
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy22;
-       case '=':       goto yy24;
-       default:        goto yy3;
-       }
-yy24:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy25:  switch(yych){
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy24;
-       default:        goto yy26;
-       }
-yy26:
-#line 155
-       { BEGIN(REF); }
-yy27:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'M':       case 'm':       goto yy28;
-       default:        goto yy3;
-       }
-yy28:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'E':       case 'e':       goto yy29;
-       default:        goto yy3;
-       }
-yy29:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'S':       case 's':       goto yy3;
-       default:        goto yy31;
-       }
-yy30:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy31:  switch(yych){
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy30;
-       case 'S':       case 's':       goto yy32;
-       default:        goto yy3;
-       }
-yy32:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'R':       case 'r':       goto yy33;
-       default:        goto yy3;
-       }
-yy33:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'C':       case 'c':       goto yy34;
-       default:        goto yy3;
-       }
-yy34:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy35:  switch(yych){
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy34;
-       case '=':       goto yy36;
-       default:        goto yy3;
-       }
-yy36:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy37:  switch(yych){
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy36;
-       default:        goto yy38;
-       }
-yy38:
-#line 153
-       { BEGIN(REF); }
-yy39:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy40:  switch(yych){
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy39;
-       case 'H':       case 'h':       goto yy54;
-       default:        goto yy3;
-       }
-yy41:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'E':       case 'e':       goto yy42;
-       default:        goto yy3;
-       }
-yy42:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'A':       case 'a':       goto yy43;
-       default:        goto yy3;
-       }
-yy43:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'H':       case 'h':       goto yy3;
-       default:        goto yy45;
-       }
-yy44:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy45:  switch(yych){
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy44;
-       case 'H':       case 'h':       goto yy46;
-       default:        goto yy3;
-       }
-yy46:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'R':       case 'r':       goto yy47;
-       default:        goto yy3;
-       }
-yy47:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'E':       case 'e':       goto yy48;
-       default:        goto yy3;
-       }
-yy48:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'F':       case 'f':       goto yy49;
-       default:        goto yy3;
-       }
-yy49:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy50:  switch(yych){
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy49;
-       case '=':       goto yy51;
-       default:        goto yy3;
-       }
-yy51:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy52:  switch(yych){
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy51;
-       default:        goto yy53;
-       }
-yy53:
-#line 156
-       { BEGIN(REF); }
-yy54:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'R':       case 'r':       goto yy55;
-       default:        goto yy3;
-       }
-yy55:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'E':       case 'e':       goto yy56;
-       default:        goto yy3;
-       }
-yy56:  yych = *++YYCURSOR;
-       switch(yych){
-       case 'F':       case 'f':       goto yy57;
-       default:        goto yy3;
-       }
-yy57:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy58:  switch(yych){
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy57;
-       case '=':       goto yy59;
-       default:        goto yy3;
-       }
-yy59:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy60:  switch(yych){
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy59;
-       default:        goto yy61;
-       }
-yy61:
-#line 154
-       { BEGIN(REF); }
-}
-#line 159
+               case STATE_TAG:
+                       if(isalnum(*src)) {
+                               *(US.p)++ = *src;
+                               US.l++; 
+                               if(US.l==US.ml) {
+                                       US.ml+=BUFSIZE;
+                                       US.tag=erealloc(US.tag,US.ml);
+                                       US.p = US.tag+US.l;
+                               }
+                       } else if (isspace(*src)) {
+                               US.state = STATE_IN_TAG;
+                               *US.p='\0';
+                               US.tag=erealloc(US.tag,US.l);
+                       } else {
+                               US.state = STATE_NORMAL;
+                               efree(US.tag);
+                               US.tag=NULL;
+                       }
+                       break;
 
+               case STATE_IN_TAG:
+                       if(isalnum(*src)) {
+                               US.state=STATE_TAG_ATTR;
+                               US.ml=BUFSIZE;
+                               US.p=US.attr=erealloc(US.attr,US.ml);
+                               *(US.p)++=*src;
+                               US.l=1;
+                       } else if (! isspace(*src)) {
+                               US.state = STATE_NORMAL;
+                               efree(US.tag);
+                               US.tag=NULL;
+                       }
                        break;
-               case REF: 
-{
-       YYCTYPE yych;
-       unsigned int yyaccept;
-       goto yy62;
-yy63:  ++YYCURSOR;
-yy62:
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-       switch(yych){
-       case '\000':    case '>':       goto yy64;
-       case '\t':      case '\v':
-       case '\f':      case ' ':       case '"':       goto yy65;
-       case '#':       goto yy69;
-       case ':':       goto yy71;
-       default:        goto yy67;
-       }
-yy64:
-#line 163
-       { BEGIN(INITIAL); }
-yy65:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy66:  switch(yych){
-       case '\000':    case '>':       goto yy64;
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy65;
-       case '"':       goto yy79;
-       case '#':       goto yy69;
-       case ':':       goto yy71;
-       default:        goto yy67;
-       }
-yy67:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy68:  switch(yych){
-       case '\000':    case '>':       goto yy64;
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy77;
-       case '"':       goto yy79;
-       case '#':       goto yy69;
-       case ':':       goto yy71;
-       default:        goto yy67;
-       }
-yy69:  yych = *++YYCURSOR;
-yy70:  YYCURSOR -= 1;
-#line 164
-       { BEGIN(INITIAL); }
-yy71:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy72:  switch(yych){
-       case '\000':    case '#':       case '>':       goto yy73;
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy74;
-       case '"':       goto yy76;
-       default:        goto yy71;
-       }
-yy73:
-#line 165
-       { 
-                       /* don't modify absolute links */
-                       state->state = INITIAL; BEGIN(INITIAL); 
-       }
-yy74:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy75:  switch(yych){
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy74;
-       case '"':       goto yy76;
-       default:        goto yy73;
-       }
-yy76:  yych = *++YYCURSOR;
-       goto yy73;
-yy77:  ++YYCURSOR;
-       if(YYLIMIT == YYCURSOR) YYFILL(1);
-       yych = *YYCURSOR;
-yy78:  switch(yych){
-       case '\t':      case '\v':
-       case '\f':      case ' ':       goto yy77;
-       case '"':       goto yy79;
-       default:        goto yy64;
-       }
-yy79:  yych = *++YYCURSOR;
-       goto yy64;
-}
-#line 169
 
-                       break;
-       }
-nextiter:
-       ;
-       }
-finish:
-       ;
-}
+               case STATE_TAG_ATTR:
+                       if(isalnum(*src)) {
+                               *US.p++=*src;
+                               ++US.l;
+                               if(US.l==US.ml) {
+                                       US.ml+=BUFSIZE;
+                                       US.attr=erealloc(US.attr,US.ml);
+                                       US.p = US.attr+US.l;
+                               }
+                               if(US.l==US.ml) {
+                                       US.ml+=BUFSIZE;
+                                       US.attr=erealloc(US.attr,US.ml);
+                                       US.p = US.attr+US.l;
+                               }
+                       } else if(isspace(*src)||(*src=='=')){
+                               US.state=STATE_TAG_IS;
+                               *US.p=0;
+                               US.attr=erealloc(US.attr,US.l);
+                       } else if(*src=='>') {
+                               US.state=STATE_NORMAL;
+                       } else {
+                               efree(US.attr);
+                               US.attr=NULL;
+                               US.state=STATE_IN_TAG;
+                       }
+                       break;
+                       
+               case STATE_TAG_IS:
+               case STATE_TAG_IS2:
+                       if(!isspace(*src)) {
+                               US.ml=BUFSIZE;
+                               US.p=US.val=erealloc(US.val,US.ml);
+                               US.l=0;
+                               if((*src=='"')||(*src=='\'')) {
+                                       US.state=STATE_TAG_QVAL2;
+                                       US.delim=*src;
+                               } else {
+                                       US.state=STATE_TAG_VAL;
+                                       *US.p++=*src;
+                                       US.l++;
+                               }
+                       }
+                       break;
 
-char *url_adapt(const char *src, size_t srclen, const char *data, size_t *newlen)
-{
-       lexdata state;
 
-       state.state = INITIAL;
-       state.start = state.crs = src;
-       state.end = src + srclen;
-       state.ptr = NULL;
-       state.target = NULL;
-       state.targetsize = 0;
-       state.data = data;
+               case STATE_TAG_QVAL2:
+                       if(*src==US.delim) {
+                               char *p;
+                               US.state=STATE_IN_TAG;
+                               *US.p='\0';
+                               p=url_attr_addon(US.tag,US.attr,US.val,data);
+                               if(p) {
+                                       int l= strlen(p);
+                                       maxl+=l;
+                                       out=realloc(out,maxl);
+                                       outp=out+*newlen;
+                                       strcpy(outp,p);
+                                       outp+=l;
+                                       *newlen+=l;
+                                       efree(p);
+                               }
+                               break;
+                       } else if(*src=='\\') {
+                               no_output=1;
+                               US.state=STATE_TAG_QVAL2b;
+                       } else if (*src=='>') {
+                               US.state=STATE_NORMAL;
+                       }
+                       
+                       *US.p++=*src;
+                       ++US.l; 
+                       if(US.l==US.ml) {
+                               US.ml+=BUFSIZE;
+                               US.val=erealloc(US.val,US.ml);
+                               US.p = US.val+US.l;
+                       }
+                       
+                       break;
+                       
+               case STATE_TAG_QVAL2b:
+                       US.state=STATE_TAG_QVAL2;
+                       *US.p++=*src;
+                       ++US.l; 
+                       if(US.l==US.ml) {
+                               US.ml+=BUFSIZE;
+                               US.val=erealloc(US.val,US.ml);
+                               US.p = US.val+US.l;
+                       }
+                       break;
 
-       url_scanner(&state);
+               case STATE_TAG_VAL:
+                       if(!isspace(*src)) {
+                               if((*src=='"')||(*src=='\'')) {
+                                       US.state=STATE_TAG_QVAL2;
+                                       US.delim=*src;
+                               } else {
+                                       *US.p++=*src;
+                                       US.l++; 
+                                       if(US.l==US.ml) {
+                                               US.ml+=BUFSIZE;
+                                               US.val=erealloc(US.val,US.ml);
+                                               US.p = US.val+US.l;
+                                       }
+                                       US.state=STATE_TAG_VAL2;
+                               }
+                       }
+                       break;
 
-       if(newlen) *newlen = state.targetsize;
+               case STATE_TAG_VAL2:
+                       if(isspace(*src)||(*src=='>')) {
+                               char *p;
+                               US.state=(*src=='>')?STATE_NORMAL:STATE_IN_TAG;
+                               *US.p='\0';
+                               p=url_attr_addon(US.tag,US.attr,US.val,data);
+                               if(p) {
+                                       int l= strlen(p);
+                                       maxl+=l;
+                                       out=realloc(out,maxl);
+                                       outp=out+*newlen;
+                                       strcpy(outp,p);
+                                       outp+=l;
+                                       *newlen+=l;
+                                       efree(p);
+                               }
+                       } else {
+                               *US.p++=*src;
+                               US.l++; 
+                               if(US.l==US.ml) {
+                                       US.ml+=BUFSIZE;
+                                       US.val=erealloc(US.val,US.ml);
+                                       US.p = US.val+US.l;
+                               }
+                       }
+                       break;
+               }
 
-       return state.target;
+               if(no_output) {
+                       src++;
+                       no_output=0;
+                       continue;
+               } 
+               *outp++=*src++;
+               *newlen+=1;
+       }
+       *outp='\0';
+       return out;
 }
 
 #endif
index 3c0e7b29a59f62a6542da38019206defa6633824..b7e3af38b25e15ebf7ac205bff3e6cfc3d0ebd6f 100644 (file)
 
 char *url_adapt(const char *src, size_t srclen, const char *data, size_t *newlen);
 
+enum url_state { 
+       STATE_NORMAL, 
+       STATE_TAG_START, 
+       STATE_TAG, 
+       STATE_IN_TAG, 
+       STATE_TAG_ATTR, 
+       STATE_TAG_IS, 
+       STATE_TAG_IS2, 
+       STATE_TAG_VAL, 
+       STATE_TAG_VAL2, 
+       STATE_TAG_QVAL1,
+       STATE_TAG_QVAL2, 
+       STATE_TAG_QVAL2b 
+};
+
+typedef struct url_adapt_struct {
+       enum url_state state;
+       char *tag;
+       char *attr;
+       char *val;
+       char delim;
+       char *p;
+       int l,ml;
+} url_adapt_state_t;
+
 #endif