]> granicus.if.org Git - php/commitdiff
fixed possible bufferoverflow in get_next_char
authorStefan Esser <sesser@php.net>
Sat, 16 Mar 2002 09:44:30 +0000 (09:44 +0000)
committerStefan Esser <sesser@php.net>
Sat, 16 Mar 2002 09:44:30 +0000 (09:44 +0000)
malformed input to htmlentities/htmlspecialchars
with utf-8 encoding crashed the server

ex: htmlentities("\xfd...(30times)", ENT_NOQUOTES, "utf-8");

ext/standard/html.c

index 76f40d5c6511b0a62c62e242e6e6760979b4631f..22756953c975da0be86dbc3027bb9d5171b2baa6 100644 (file)
@@ -137,21 +137,38 @@ static const struct {
        { 0, NULL, 0, 0 }
 };
        
+#define MB_RETURN { \
+                       *newpos = pos;       \
+                       mbseq[mbpos] = '\0'; \
+                       *mbseqlen = mbpos;   \
+                       return this_char; }
+                                       
+#define MB_WRITE(mbchar) { \
+                       mbspace--;  \
+                       if (mbspace == 0) {      \
+                               MB_RETURN;           \
+                       }                        \
+                       mbseq[mbpos++] = (mbchar); }
 
 /* {{{ get_next_char
  */
 inline static unsigned short get_next_char(enum entity_charset charset,
-               unsigned char *str,
-               int *newpos,
-               unsigned char *mbseq,
-               int *mbseqlen
-)
+               unsigned char * str,
+               int * newpos,
+               unsigned char * mbseq,
+               int * mbseqlen)
 {
        int pos = *newpos;
        int mbpos = 0;
+       int mbspace = *mbseqlen;
        unsigned short this_char = str[pos++];
        
-       mbseq[mbpos++] = (unsigned char)this_char;
+       if (mbspace <= 0) {
+               *mbseqlen = 0;
+               return this_char;
+       }
+       
+       MB_WRITE((unsigned char)this_char);
        
        switch(charset) {
                case cs_utf_8:
@@ -232,7 +249,7 @@ inline static unsigned short get_next_char(enum entity_charset charset,
                                        if (more)
                                        {
                                                this_char = str[pos++];
-                                               mbseq[mbpos++] = (unsigned char)this_char;
+                                               MB_WRITE((unsigned char)this_char);
                                        }
                                } while(more);
                        }
@@ -250,7 +267,7 @@ inline static unsigned short get_next_char(enum entity_charset charset,
                                        {
                                                /* yes, this a wide char */
                                                this_char <<= 8;
-                                               mbseq[mbpos++] = next_char;
+                                               MB_WRITE(next_char);
                                                this_char |= next_char;
                                                pos++;
                                        }
@@ -271,7 +288,7 @@ inline static unsigned short get_next_char(enum entity_charset charset,
                                        {
                                                /* yes, this a wide char */
                                                this_char <<= 8;
-                                               mbseq[mbpos++] = next_char;
+                                               MB_WRITE(next_char);
                                                this_char |= next_char;
                                                pos++;
                                        }
@@ -289,7 +306,7 @@ inline static unsigned short get_next_char(enum entity_charset charset,
                                        {
                                                /* yes, this a jis kanji char */
                                                this_char <<= 8;
-                                               mbseq[mbpos++] = next_char;
+                                               MB_WRITE(next_char);
                                                this_char |= next_char;
                                                pos++;
                                        }
@@ -301,7 +318,7 @@ inline static unsigned short get_next_char(enum entity_charset charset,
                                        {
                                                /* JIS X 0201 kana */
                                                this_char <<= 8;
-                                               mbseq[mbpos++] = next_char;
+                                               MB_WRITE(next_char);
                                                this_char |= next_char;
                                                pos++;
                                        }
@@ -315,12 +332,13 @@ inline static unsigned short get_next_char(enum entity_charset charset,
                                        {
                                                /* JIS X 0212 hojo-kanji */
                                                this_char <<= 8;
-                                               mbseq[mbpos++] = next_char;
+                                               MB_WRITE(next_char);
                                                this_char |= next_char;
+                                               pos++;
                                                this_char <<= 8;
-                                               mbseq[mbpos++] = next2_char;
+                                               MB_WRITE(next2_char);
                                                this_char |= next2_char;
-                                               pos+=2;
+                                               pos++;
                                        }
                                        
                                }
@@ -331,10 +349,7 @@ inline static unsigned short get_next_char(enum entity_charset charset,
                                break;
                        }
        }
-       *newpos = pos;
-       mbseq[mbpos] = '\0';
-       *mbseqlen = mbpos;
-       return this_char;
+       MB_RETURN;
 }
 /* }}} */
 
@@ -489,8 +504,8 @@ PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newle
 
        i = 0;
        while (i < oldlen) {
-               int mbseqlen;
                unsigned char mbsequence[16];   /* allow up to 15 characters in a multibyte sequence */
+               int mbseqlen = sizeof(mbsequence);
                unsigned short this_char = get_next_char(charset, old, &i, mbsequence, &mbseqlen);
 
                matches_map = 0;