]> granicus.if.org Git - php/commitdiff
Added sscanf() function.
authorClayton Collie <ccollie@php.net>
Tue, 6 Jun 2000 18:58:15 +0000 (18:58 +0000)
committerClayton Collie <ccollie@php.net>
Tue, 6 Jun 2000 18:58:15 +0000 (18:58 +0000)
ext/standard/basic_functions.c
ext/standard/file.c
ext/standard/file.h
ext/standard/php_string.h
ext/standard/scanf.c [new file with mode: 0644]
ext/standard/scanf.h [new file with mode: 0644]
ext/standard/string.c

index f994b7054503d567e57fdb5be7950774fb218b4a..334cee2a1ce4f86e321d2b029e0a6225d68e2d22 100644 (file)
@@ -173,7 +173,9 @@ function_entry basic_functions[] = {
        PHP_FALIAS(strchr,                      strstr,                         NULL)
        PHP_NAMED_FE(sprintf,           PHP_FN(user_sprintf),   NULL)
        PHP_NAMED_FE(printf,            PHP_FN(user_printf),    NULL)
-       
+    PHP_FE(sscanf,                                  NULL)
+    PHP_FE(fscanf,                                  NULL)
+
        PHP_FE(parse_url,                                                               NULL)
        PHP_FE(urlencode,                                                               NULL)
        PHP_FE(urldecode,                                                               NULL)
index 232b1ee048dd6d743c55c46d764476eba36fac8f..bd7b7f90cbd542546fd44e3ac91d6c87b76564fb 100644 (file)
@@ -86,6 +86,9 @@ extern int fclose();
 #endif
 
 #include "php_realpath.h"
+#include "scanf.h"
+#include "zend_API.h"
+
 
 /* }}} */
 /* {{{ ZTS-stuff / Globals / Prototypes */
@@ -999,6 +1002,7 @@ PHP_FUNCTION(fgetc) {
 }
 
 /* }}} */
+
 /* {{{ proto string fgetss(int fp, int length [, string allowable_tags])
    Get a line from file pointer and strip HTML tags */
 
@@ -1060,6 +1064,74 @@ PHP_FUNCTION(fgetss)
 }
 
 /* }}} */
+/* {{{ proto  mixed fscanf(string str,string format, ...)
+     implements a mostly ANSI compatible  fscanf() . */
+PHP_FUNCTION(fscanf)
+{
+    int  result;
+    pval **file_handle, **format_string;
+    int len, type;
+    char *buf;
+    int issock=0;
+    int socketd=0;
+    void *what;
+    
+    zval ***args;
+    int argCount;   
+    PLS_FETCH();
+    
+    argCount = ZEND_NUM_ARGS();
+    if (argCount < 2) {
+        WRONG_PARAM_COUNT;
+    }
+    args = (zval ***)emalloc(argCount * sizeof(zval **));
+    if (!args || (zend_get_parameters_array_ex(argCount,args) == FAILURE)) {
+        efree( args );
+        WRONG_PARAM_COUNT;
+    }
+    
+    file_handle    = args[0];
+    format_string  = args[1];
+
+    what = zend_fetch_resource(file_handle,-1,"File-Handle",&type,3,le_fopen,le_popen,le_socket);
+
+    /*
+     * we can't do a ZEND_VERIFY_RESOURCE(what), otherwise we end up
+     * with a leak if we have an invalid filehandle. This needs changing
+     * if the code behind ZEND_VERIFY_RESOURCE changed. - cc
+     */
+    if (!what) {
+        efree(args);
+        RETURN_FALSE;
+    }
+
+    len = SCAN_MAX_FSCANF_BUFSIZE;
+
+    if (type == le_socket) {
+        issock=1;
+        socketd=*(int*)what;
+    }
+    buf = emalloc(sizeof(char) * (len + 1));
+    /* needed because recv doesnt put a null at the end*/
+    memset(buf,0,len+1);
+    if (FP_FGETS(buf, len, socketd, (FILE*)what, issock) == NULL) {
+        efree(buf);
+        RETVAL_FALSE;
+    } else {
+        convert_to_string_ex( format_string );  
+        result = php_sscanf_internal( buf,(*format_string)->value.str.val,
+                        argCount,args, 2,&return_value);
+        efree(args);
+        efree(buf);
+        if (SCAN_ERROR_WRONG_PARAM_COUNT == result) {
+            WRONG_PARAM_COUNT
+        }
+    }
+
+
+}
+/* }}} */
+
 /* {{{ proto int fwrite(int fp, string str [, int length])
    Binary-safe file write */
 
@@ -1858,6 +1930,8 @@ PHP_FUNCTION(realpath)
 }
 /* }}} */
 
+
+
 #if 0
 
 static fd_set readfd;
index 334e69af2b2073961c1a2edc2bca94bdbbb3bbe5..adf9d9b028f82afa491d308c775a3b5e5cd30c7d 100644 (file)
@@ -46,6 +46,7 @@ PHP_FUNCTION(feof);
 PHP_FUNCTION(fread);
 PHP_FUNCTION(fgetc);
 PHP_FUNCTION(fgets);
+PHP_FUNCTION(fscanf);
 PHP_FUNCTION(fgetss);
 PHP_FUNCTION(fgetcsv);
 PHP_FUNCTION(fwrite);
index 6de3f38b83ed12a07cbdbc69e49efcc21e50913f..ea03855cdcf091465800501d6e8f79e85ba18b8b 100644 (file)
@@ -86,6 +86,7 @@ PHP_FUNCTION(substr_replace);
 PHP_FUNCTION(strnatcmp);
 PHP_FUNCTION(strnatcasecmp);
 PHP_FUNCTION(substr_count);
+PHP_FUNCTION(sscanf);
 
 #define strnatcmp(a, b) \
        strnatcmp_ex(a, strlen(a), b, strlen(b), 0)
diff --git a/ext/standard/scanf.c b/ext/standard/scanf.c
new file mode 100644 (file)
index 0000000..5056123
--- /dev/null
@@ -0,0 +1,1241 @@
+/* 
+   +----------------------------------------------------------------------+
+   | PHP version 4.0                                                      |
+   +----------------------------------------------------------------------+
+   | Copyright (c) 1997, 1998, 1999, 2000 The PHP Group                   |
+   +----------------------------------------------------------------------+
+   | This source file is subject to version 2.02 of the PHP license,      |
+   | that is bundled with this package in the file LICENSE, and is        |
+   | available at through the world-wide-web at                           |
+   | http://www.php.net/license/2_02.txt.                                 |
+   | If you did not receive a copy of the PHP license and are unable to   |
+   | obtain it through the world-wide-web, please send a note to          |
+   | license@php.net so we can mail you a copy immediately.               |
+   +----------------------------------------------------------------------+
+   | Authors: clayton collie <clcollie@mindspring.com>                    |
+   +----------------------------------------------------------------------+
+ */
+
+/*
+   scanf.c --
+    This file contains the base code which implements sscanf and by extension
+    fscanf. Original code is from TCL8.3.0 and bears the following copyright
+       This software is copyrighted by the Regents of the University of
+       California, Sun Microsystems, Inc., Scriptics Corporation,
+       and other parties.  The following terms apply to all files associated
+       with the software unless explicitly disclaimed in individual files.
+
+       The authors hereby grant permission to use, copy, modify, distribute,
+       and license this software and its documentation for any purpose, provided
+       that existing copyright notices are retained in all copies and that this
+       notice is included verbatim in any distributions. No written agreement,
+       license, or royalty fee is required for any of the authorized uses.
+       Modifications to this software may be copyrighted by their authors
+       and need not follow the licensing terms described here, provided that
+       the new terms are clearly indicated on the first page of each file where
+       they apply.
+
+       IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
+       FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+       ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
+       DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
+       POSSIBILITY OF SUCH DAMAGE.
+       
+       THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
+       INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
+       FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
+       IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
+       NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
+       MODIFICATIONS.
+
+       GOVERNMENT USE: If you are acquiring this software on behalf of the
+       U.S. government, the Government shall have only "Restricted Rights"
+       in the software and related documentation as defined in the Federal 
+       Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
+       are acquiring the software on behalf of the Department of Defense, the
+       software shall be classified as "Commercial Computer Software" and the
+       Government shall have only "Restricted Rights" as defined in Clause
+       252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
+       authors grant the U.S. Government and others acting in its behalf
+       permission to use and distribute the software in accordance with the
+       terms specified in this license. 
+ */
+#include <stdio.h>
+#include <limits.h>
+#include <ctype.h>
+#include "php.h"
+#include "php_variables.h"
+#ifdef HAVE_LOCALE_H
+#include <locale.h> 
+#endif
+#include "zend_execute.h"
+#include "zend_operators.h"
+#include "php_globals.h"
+#include "basic_functions.h"
+#include "scanf.h"
+
+/*
+ * Flag values used internally by [f|s]canf.
+ */
+
+#define SCAN_NOSKIP     0x1       /* Don't skip blanks. */
+#define SCAN_SUPPRESS  0x2       /* Suppress assignment. */
+#define SCAN_UNSIGNED  0x4       /* Read an unsigned value. */
+#define SCAN_WIDTH      0x8       /* A width value was supplied. */
+
+#define SCAN_SIGNOK     0x10      /* A +/- character is allowed. */
+#define SCAN_NODIGITS   0x20      /* No digits have been scanned. */
+#define SCAN_NOZERO     0x40      /* No zero digits have been scanned. */
+#define SCAN_XOK        0x80      /* An 'x' is allowed. */
+#define SCAN_PTOK       0x100     /* Decimal point is allowed. */
+#define SCAN_EXPOK      0x200     /* An exponent is allowed. */
+
+#define UCHAR(x)               (zend_uchar)(x)
+
+
+
+/*
+ * The following structure contains the information associated with
+ * a character set.
+ */
+
+typedef struct CharSet {
+    int exclude;               /* 1 if this is an exclusion set. */
+    int nchars;
+    char *chars;
+    int nranges;
+    struct Range {
+        char start;
+        char end;
+    } *ranges;
+} CharSet;
+
+/*
+ * Declarations for functions used only in this file.
+ */
+
+static char *BuildCharSet(CharSet *cset, char *format);
+static int     CharInSet(CharSet *cset, int ch);
+static void    ReleaseCharSet(CharSet *cset);
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * BuildCharSet --
+ *
+ *     This function examines a character set format specification
+ *     and builds a CharSet containing the individual characters and
+ *     character ranges specified.
+ *
+ * Results:
+ *     Returns the next format position.
+ *
+ * Side effects:
+ *     Initializes the charset.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static char * BuildCharSet(CharSet *cset, char *format)
+{
+    char *ch, start;
+    int  nranges;
+    char *end;
+
+    memset(cset, 0, sizeof(CharSet));
+    
+    ch = format;
+    if (*ch == '^') {
+        cset->exclude = 1;
+        ch = ++format;
+    }
+    end = format + 1;        /* verify this - cc */
+
+    /*
+     * Find the close bracket so we can overallocate the set.
+     */
+
+    if (*ch == ']') {
+        ch = end++;
+    }
+    nranges = 0;
+    while (*ch != ']') {
+        if (*ch == '-') {
+            nranges++;
+        }
+        ch = end++;
+    }
+
+    cset->chars = (char *) emalloc(sizeof(char) * (end - format - 1));
+    if (nranges > 0) {
+        cset->ranges = (struct Range *) emalloc(sizeof(struct Range)*nranges);
+    } else {
+        cset->ranges = NULL;
+    }
+
+    /*
+     * Now build the character set.
+     */
+
+    cset->nchars = cset->nranges = 0;
+    ch    = format++;
+    start = *ch;
+    if (*ch == ']' || *ch == '-') {
+        cset->chars[cset->nchars++] = *ch;
+        ch = format++;
+    }
+    while (*ch != ']') {
+        if (*format == '-') {
+            /*
+             * This may be the first character of a range, so don't add
+             * it yet.
+             */
+
+            start = *ch;
+        } else if (*ch == '-') {
+            /*
+             * Check to see if this is the last character in the set, in which
+             * case it is not a range and we should add the previous character
+             * as well as the dash.
+             */
+
+            if (*format == ']') {
+                cset->chars[cset->nchars++] = start;
+                cset->chars[cset->nchars++] = *ch;
+            } else {
+                ch = format++;
+
+                /*
+                 * Check to see if the range is in reverse order.
+                 */
+
+                if (start < *ch) {
+                    cset->ranges[cset->nranges].start = start;
+                    cset->ranges[cset->nranges].end = *ch;
+                } else {
+                    cset->ranges[cset->nranges].start = *ch;
+                    cset->ranges[cset->nranges].end = start;
+                }
+                cset->nranges++;
+           }
+        } else {
+            cset->chars[cset->nchars++] = *ch;
+        }
+        ch = format++;
+    }
+    return format;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * CharInSet --
+ *
+ *     Check to see if a character matches the given set.
+ *
+ * Results:
+ *     Returns non-zero if the character matches the given set.
+ *
+ * Side effects:
+ *     None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int CharInSet(CharSet *cset, int c)
+{
+    char ch = (char) c;
+    int i, match = 0;
+
+    for (i = 0; i < cset->nchars; i++) {
+        if (cset->chars[i] == ch) {
+            match = 1;
+            break;
+        }
+    }
+    if (!match) {
+        for (i = 0; i < cset->nranges; i++) {
+            if ((cset->ranges[i].start <= ch)
+                && (ch <= cset->ranges[i].end)) {
+                match = 1;
+                break;
+            }
+        }
+    }
+    return (cset->exclude ? !match : match);    
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * ReleaseCharSet --
+ *
+ *     Free the storage associated with a character set.
+ *
+ * Results:
+ *     None.
+ *
+ * Side effects:
+ *     None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void ReleaseCharSet(CharSet *cset)
+{
+    efree((char *)cset->chars);
+    if (cset->ranges) {
+        efree((char *)cset->ranges);
+    }
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * ValidateFormat --
+ *
+ *     Parse the format string and verify that it is properly formed
+ *     and that there are exactly enough variables on the command line.
+ *
+ * Results:
+ *    FAILURE or SUCCESS.
+ *
+ * Side effects:
+ *     May set php_error based on abnormal conditions.
+ *
+ * Parameters :
+ *     format     The format string.
+ *     numVars    The number of variables passed to the scan command.
+ *     totalSubs  The number of variables that will be required.
+ *
+ *----------------------------------------------------------------------
+*/
+
+
+PHPAPI int ValidateFormat(char *format,int numVars,int *totalSubs)
+{
+#define STATIC_LIST_SIZE 16
+    int gotXpg, gotSequential, value, i, flags;
+    char *end, *ch = NULL;
+    int staticAssign[STATIC_LIST_SIZE];
+    int *nassign = staticAssign;
+    int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
+
+    /*
+     * Initialize an array that records the number of times a variable
+     * is assigned to by the format string.  We use this to detect if
+     * a variable is multiply assigned or left unassigned.
+     */
+
+    if (numVars > nspace) {
+        nassign = (int*)emalloc(sizeof(int) * numVars);
+        nspace = numVars;
+    }
+    for (i = 0; i < nspace; i++) {
+        nassign[i] = 0;
+    }
+
+    xpgSize = objIndex = gotXpg = gotSequential = 0;
+
+    while (*format != '\0') {
+        ch = format++;
+        flags = 0;
+
+        if (*ch != '%') {
+            continue;
+        }
+        ch = format++;
+        if (*ch == '%') {
+            continue;
+        }
+        if (*ch == '*') {
+            flags |= SCAN_SUPPRESS;
+            ch = format++;
+            goto xpgCheckDone;
+        }
+
+        if ( isdigit( (int)*ch ) ) { 
+           /*
+            * Check for an XPG3-style %n$ specification.  Note: there
+            * must not be a mixture of XPG3 specs and non-XPG3 specs
+            * in the same format string.
+            */
+
+           value = strtoul(format-1, &end, 10); 
+           if (*end != '$') {
+              goto notXpg;
+            }
+            format = end+1;
+            ch     = format++;
+            gotXpg = 1;
+            if (gotSequential) {
+                goto mixedXPG;
+            }
+            objIndex = value - 1;
+            if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
+                goto badIndex;
+            } else if (numVars == 0) {
+                /*
+                 * In the case where no vars are specified, the user can
+                 * specify %9999$ legally, so we have to consider special
+                 * rules for growing the assign array.  'value' is
+                 * guaranteed to be > 0.
+                 */
+
+                 /* set a lower artificial limit on this
+                  * in the interest of security and resource friendliness
+                  * 255 arguments should be more than enough. - cc
+                  */
+                if (value > SCAN_MAX_ARGS) {
+                    goto badIndex;
+                }
+
+                xpgSize = (xpgSize > value) ? xpgSize : value;
+            }
+            goto xpgCheckDone;
+       }
+
+       notXpg:
+        gotSequential = 1;
+        if (gotXpg) {
+            mixedXPG:
+              php_error(E_WARNING,
+                "cannot mix \"%\" and \"%n$\" conversion specifiers in %s", get_active_function_name() );
+            goto error;
+        }
+
+       xpgCheckDone:
+       /*
+        * Parse any width specifier.
+        */
+
+    if (isdigit(UCHAR(*ch))) { 
+           value = strtoul(format-1, &format, 10);
+           flags |= SCAN_WIDTH;
+        ch = format++;
+       }
+
+       /*
+        * Ignore size specifier.
+        */
+
+    if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
+        ch = format++;
+       }
+
+       if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
+           goto badIndex;
+       }
+
+       /*
+        * Handle the various field types.
+        */
+
+   switch (*ch) {
+           case 'n':
+           case 'd':
+               case 'D':               
+           case 'i':
+           case 'o':
+           case 'x':
+               case 'X':               
+           case 'u':
+           case 'f':
+           case 'e':
+               case 'E':               
+           case 'g':
+           case 's':
+              break;
+           case 'c':
+               /* we differ here with the TCL implementation in allowing for */
+               /* a character width specification,to be more consistent with */
+               /* ANSI. since Zend auto allocates space for vars, this is no */
+               /* problem - cc                                               */
+                /*
+                if (flags & SCAN_WIDTH) {
+                    php_error(E_WARNING, "field width may not be specified in %c conversion");
+                    goto error;
+                }
+                */
+                break;
+           case '[':
+            if (*format == '\0') {
+                goto badSet;
+            }
+            (char *)ch = format++;
+            if (*ch == '^') {
+                if (*format == '\0') {
+                    goto badSet;
+                }
+                ch = format++;
+            }
+            if (*ch == ']') {
+                if (*format == '\0') {
+                    goto badSet;
+                }
+                ch = format++;
+            }
+            while (*ch != ']') {
+                if (*format == '\0') {
+                    goto badSet;
+                }
+                ch = format++;
+            }
+            break;
+           badSet:
+            php_error(E_WARNING, "unmatched [ in format string");
+            goto error;
+           default:
+            {
+             php_error(E_WARNING,"bad scan conversion character \"%c\"", ch);
+             goto error;
+           }
+       }
+       if (!(flags & SCAN_SUPPRESS)) {
+           if (objIndex >= nspace) {
+            /*
+             * Expand the nassign buffer.  If we are using XPG specifiers,
+             * make sure that we grow to a large enough size.  xpgSize is
+             * guaranteed to be at least one larger than objIndex.
+             */
+            value = nspace;
+            if (xpgSize) {
+                nspace = xpgSize;
+            } else {
+                nspace += STATIC_LIST_SIZE;
+            }
+            if (nassign == staticAssign) {
+                nassign = (void *)emalloc(nspace * sizeof(int));
+                for (i = 0; i < STATIC_LIST_SIZE; ++i) {
+                    nassign[i] = staticAssign[i];
+                }
+            } else {
+                nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
+            }
+            for (i = value; i < nspace; i++) {
+                nassign[i] = 0;
+            }
+           }
+           nassign[objIndex]++;
+           objIndex++;
+     }
+   }  /* while (*format != '\0') */
+
+    /*
+     * Verify that all of the variable were assigned exactly once.
+     */
+
+    if (numVars == 0) {
+        if (xpgSize) {
+            numVars = xpgSize;
+        } else {
+            numVars = objIndex;
+        }
+    }
+    if (totalSubs) {
+        *totalSubs = numVars;
+    }
+    for (i = 0; i < numVars; i++) {
+        if (nassign[i] > 1) {
+            php_error(E_WARNING, "variable is assigned by multiple \"%n$\" conversion specifiers");
+            goto error;
+        } else if (!xpgSize && (nassign[i] == 0)) {
+            /*
+             * If the space is empty, and xpgSize is 0 (means XPG wasn't
+             * used, and/or numVars != 0), then too many vars were given
+             */
+            php_error(E_WARNING, "variable is not assigned by any conversion specifiers");
+            goto error;
+        }
+    }
+
+    if (nassign != staticAssign) {
+        efree((char *)nassign);
+    }
+    return SCAN_SUCCESS;
+
+    badIndex:
+        if (gotXpg) {
+            php_error(E_WARNING, "\"%n$\" argument index out of range");
+        } else {
+            php_error(E_WARNING,"different numbers of variable names and field specifiers");
+        }
+
+    error:
+        if (nassign != staticAssign) {
+            efree((char *)nassign);
+        }
+    return SCAN_ERROR_INVALID_FORMAT;
+#undef STATIC_LIST_SIZE
+}
+
+
+
+/* This is the internal function which does processing on behalf of
+ * both sscanf() and fscanf()
+ * 
+ * parameters :
+ *             string          literal string to be processed
+ *             format          format string
+ *             argCount        total number of elements in the args array
+ *             args            arguments passed in from user function (f|s)scanf
+ *             varStart        offset (in args) of 1st variable passed in to (f|s)scanf
+ *             return_value set with the results of the scan
+ */
+
+PHPAPI int php_sscanf_internal(        char *string,char *format,
+                               int argCount,zval ***args,
+                               int varStart,pval **return_value)
+{
+    int  numVars, nconversions, totalVars = -1;
+    int  i, value, result;
+    int  objIndex;
+    char *end, *baseString;
+    zval **current;
+    char op   = 0;
+    int  base = 0;
+    int  underflow = 0;
+    size_t width;
+    long (*fn)() = NULL;
+    char *ch, sch;
+    int  flags;
+    char buf[64];      /* Temporary buffer to hold scanned
+                        * number strings before they are
+                        * passed to strtoul. */
+
+       
+       /* do some sanity checking */
+       if ((varStart > argCount) || (varStart < 0)){
+               varStart = SCAN_MAX_ARGS + 1;
+       }
+       numVars = argCount - varStart;
+       if (numVars < 0) {
+           numVars = 0;
+       }
+       
+#if 0 
+       zend_printf("<br>in sscanf_internal : <br> string is \"%s\",format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
+                                       string,format,numVars,varStart);        
+#endif 
+    /*
+     * Check for errors in the format string.
+     */
+    if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
+               scan_set_error_return( numVars, return_value ); 
+        return SCAN_ERROR_INVALID_FORMAT;
+    }
+
+       objIndex = numVars ? varStart : 0; 
+
+    /*
+        * If any variables are passed, make sure they are all passed by reference
+        */ 
+       if (numVars) {
+               for (i = varStart;i < argCount;i++){
+                       if ( ! PZVAL_IS_REF( *args[ i ] ) ) {   
+                               php_error(E_WARNING,"Parameter %d to %s() must be passed by reference",
+                                                               i, get_active_function_name());                 
+                               scan_set_error_return(numVars, return_value);
+                               return SCAN_ERROR_VAR_PASSED_BYVAL;
+                       }
+               }
+       }
+       
+       
+       /*
+     * Allocate space for the result objects. Only happens when no variables
+        * are specified
+     */
+
+    if (!numVars) {
+        /* allocate an array for return */
+        if (array_init(*return_value) == FAILURE) {
+                       scan_set_error_return(0, return_value);                 
+            return FAILURE;
+        }
+        for (i = 0; i < totalVars; i++) {
+               if (add_next_index_unset(*return_value) == FAILURE) {
+                               scan_set_error_return(0, return_value);
+                               return FAILURE;
+               }
+        }
+    }
+
+    baseString = string;
+
+    /*
+     * Iterate over the format string filling in the result objects until
+     * we reach the end of input, the end of the format string, or there
+     * is a mismatch.
+     */
+
+    nconversions = 0;
+       /* note ! - we need to limit the loop for objIndex to keep it in bounds */
+
+       while (*format != '\0') {
+
+       ch    = format++;
+
+       flags = 0;
+
+      /*
+       * If we see whitespace in the format, skip whitespace in the string.
+       */
+
+        if ( isspace( (int)*ch ) ) {
+            sch = *string;
+            while ( isspace( (int)sch ) ) {
+                if (*string == '\0') {
+                    goto done;
+                }
+                string++;
+                sch = *string;
+            }
+            continue;
+        }
+           
+        if (*ch != '%') {
+         literal:
+            if (*string == '\0') {
+                underflow = 1;
+                goto done;
+            }
+            sch = *string;
+            string++;
+            if (*ch != sch) {
+                goto done;
+            }
+            continue;
+        }
+
+        ch = format++;
+        if (*ch == '%') {
+            goto literal;
+        }
+
+        /*
+         * Check for assignment suppression ('*') or an XPG3-style
+         * assignment ('%n$').
+         */
+
+        if (*ch == '*') {
+            flags |= SCAN_SUPPRESS;
+            ch = format++;
+         } else if ( isdigit(UCHAR(*ch))) { 
+                value = strtoul(format-1, &end, 10); 
+                if (*end == '$') {
+                    format = end+1;
+                    ch = format++;
+                    objIndex = varStart + value;
+                }
+         }
+
+        /*
+         * Parse any width specifier.
+         */
+
+        if ( isdigit(UCHAR(*ch))) { 
+            width = strtoul(format-1, &format, 10); 
+            ch = format++;
+        } else {
+            width = 0;
+        }
+
+        /*
+         * Ignore size specifier.
+         */
+
+        if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
+            ch = format++;
+        }
+
+        /*
+         * Handle the various field types.
+         */
+
+        switch (*ch) {
+            case 'n':
+                if (!(flags & SCAN_SUPPRESS)) {
+                    if (numVars) {
+                        current = args[objIndex++];
+                        convert_to_long( *current );
+                        ZVAL_STRINGL( *current, string, end-string, 1);
+                    } else {
+                        add_index_long(*return_value, objIndex++, string - baseString);
+                    }
+                }
+                nconversions++;
+                continue;
+
+            case 'd':
+               case 'D':       
+                op = 'i';
+                base = 10;
+                fn = (long (*)())strtol;
+                break;
+            case 'i':
+                op = 'i';
+                base = 0;
+                fn = (long (*)())strtol;
+                break;
+            case 'o':
+                op = 'i';
+                base = 8;
+                fn = (long (*)())strtol;
+                break;
+            case 'x':
+                op = 'i';
+                base = 16;
+                fn = (long (*)())strtol;
+                break;
+            case 'u':
+                op = 'i';
+                base = 10;
+                flags |= SCAN_UNSIGNED;
+                fn = (long (*)())strtoul;
+                break;
+
+            case 'f':
+            case 'e':
+                       case 'E':       
+            case 'g':
+                op = 'f';
+                break;
+
+            case 's':
+                op = 's';
+                break;
+
+            case 'c':
+                op = 's';
+                flags |= SCAN_NOSKIP;
+                               /*-cc-*/
+                               if (0 == width) {
+                                       width = 1;
+                               }
+                               /*-cc-*/
+                break;
+            case '[':
+                op = '[';
+                flags |= SCAN_NOSKIP;
+                break;
+        }   /* switch */
+
+       /*
+        * At this point, we will need additional characters from the
+        * string to proceed.
+        */
+
+        if (*string == '\0') {
+            underflow = 1;
+            goto done;
+        }
+       
+       /*
+        * Skip any leading whitespace at the beginning of a field unless
+        * the format suppresses this behavior.
+        */
+
+        if (!(flags & SCAN_NOSKIP)) {
+            while (*string != '\0') {
+                sch = *string;
+                if (! isspace((int)sch) ) {
+                    break;
+                }
+                string++;
+            }
+            if (*string == '\0') {
+                underflow = 1;
+                goto done;
+            }
+        }
+
+       /*
+        * Perform the requested scanning operation.
+        */
+       
+       switch (op) {
+               case 'c':       
+           case 's':
+               /*
+                * Scan a string up to width characters or whitespace.
+                */
+
+            if (width == 0) {
+                width = (size_t) ~0;
+            }
+            end = string;
+            while (*end != '\0') {
+                sch = *end;
+                if ( isspace( (int)sch ) ) {
+                    break;
+                }
+                end++;
+                if (--width == 0) {
+                   break;
+                }
+            }
+            if (!(flags & SCAN_SUPPRESS)) {
+                if (numVars) {
+                    current = args[objIndex++];
+                    convert_to_string( *current );
+                    ZVAL_STRINGL( *current, string, end-string, 1);
+                } else {
+                    add_index_stringl( *return_value, objIndex++, string, end-string, 1);
+                }
+            }
+            string = end;
+            break;
+
+           case '[': {
+                       CharSet cset;
+
+            if (width == 0) {
+               width = (size_t) ~0;
+               }
+            end = string;
+
+            format = BuildCharSet(&cset, format);
+            while (*end != '\0') {
+               sch = *end;
+                if (!CharInSet(&cset, (int)sch)) {
+                       break;
+                }
+                end++;
+                if (--width == 0) {
+                               break;
+                }
+            }
+            ReleaseCharSet(&cset);
+
+            if (string == end) {
+                                /*
+                               * Nothing matched the range, stop processing
+                               */
+                goto done;
+            }
+            if (!(flags & SCAN_SUPPRESS)) {
+                if (numVars) {
+                    current = args[objIndex++];
+                    convert_to_string( *current );
+                    ZVAL_STRINGL( *current, string, end-string, 1);
+                } else {
+                    add_index_stringl(*return_value, objIndex++, string, end-string, 1);
+                }
+            }
+            string = end;
+               
+            break;
+         }
+                 /*
+           case 'c':
+                  / Scan a single character./
+
+            sch = *string;
+            string++;
+            if (!(flags & SCAN_SUPPRESS)) {
+                if (numVars) {
+                    char __buf[2];
+                    __buf[0] = sch;
+                    __buf[1] = '\0';;
+                    current = args[objIndex++];
+                    convert_to_string_ex( current );
+                    ZVAL_STRINGL( *current, __buf, 1, 1);
+                } else {
+                    add_index_stringl(*return_value, objIndex++, &sch, 1, 1);
+                }
+            }
+            break;
+               */
+           case 'i':
+               /*
+                * Scan an unsigned or signed integer.
+                */
+
+               /*-cc-*/
+               buf[0] = '\0';
+               /*-cc-*/
+            if ((width == 0) || (width > sizeof(buf) - 1)) {
+                width = sizeof(buf) - 1;
+            }
+       
+            flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
+            for (end = buf; width > 0; width--) {
+                switch (*string) {
+                /*
+                 * The 0 digit has special meaning at the beginning of
+                 * a number.  If we are unsure of the base, it
+                 * indicates that we are in base 8 or base 16 (if it is
+                 * followed by an 'x').
+                 */
+                    case '0':
+                       /*-cc-*/
+                                               if (base == 16) {
+                                                       flags |= SCAN_XOK;      
+                                               }
+                                               /*-cc-*/
+                        if (base == 0) {
+                            base = 8;
+                            flags |= SCAN_XOK;
+                        }
+                        if (flags & SCAN_NOZERO) {
+                            flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
+                        } else {
+                            flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
+                        }
+                        goto addToInt;
+
+                    case '1': case '2': case '3': case '4':
+                    case '5': case '6': case '7':
+                        if (base == 0) {
+                            base = 10;
+                        }
+                        flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
+                        goto addToInt;
+
+                    case '8': case '9':
+                        if (base == 0) {
+                            base = 10;
+                        }
+                        if (base <= 8) {
+                           break;
+                        }
+                        flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
+                        goto addToInt;
+
+                    case 'A': case 'B': case 'C':
+                    case 'D': case 'E': case 'F':
+                    case 'a': case 'b': case 'c':
+                    case 'd': case 'e': case 'f':
+                        if (base <= 10) {
+                            break;
+                        }
+                        flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
+                        goto addToInt;
+
+                    case '+': case '-':
+                        if (flags & SCAN_SIGNOK) {
+                            flags &= ~SCAN_SIGNOK;
+                            goto addToInt;
+                        }
+                        break;
+
+                    case 'x': case 'X':
+                        if ((flags & SCAN_XOK) && (end == buf+1)) {
+                            base = 16;
+                            flags &= ~SCAN_XOK;
+                            goto addToInt;
+                        }
+                        break;
+                }
+
+                   /*
+                    * We got an illegal character so we are done accumulating.
+                    */
+
+                   break;
+
+                   addToInt:
+                   /*
+                    * Add the character to the temporary buffer.
+                    */
+                *end++ = *string++;
+                if (*string == '\0') {
+                    break;
+                }
+               }
+
+               /*
+                * Check to see if we need to back up because we only got a
+                * sign or a trailing x after a 0.
+                */
+
+               if (flags & SCAN_NODIGITS) {
+                   if (*string == '\0') {
+                underflow = 1;
+                   }
+                   goto done;
+               } else if (end[-1] == 'x' || end[-1] == 'X') {
+                   end--;
+                   string--;
+               }
+
+
+               /*
+                * Scan the value from the temporary buffer.  If we are
+                * returning a large unsigned value, we have to convert it back
+                * to a string since PHP only supports signed values.
+                */
+
+               if (!(flags & SCAN_SUPPRESS)) {
+                   *end = '\0';
+                   value = (int) (*fn)(buf, NULL, base);
+                   if ((flags & SCAN_UNSIGNED) && (value < 0)) {
+                sprintf(buf, "%u", value); /* INTL: ISO digit */
+                if (numVars) {
+                  /* change passed value type to string */
+                   current = args[objIndex++];
+                   convert_to_string( *current );
+                   ZVAL_STRING( *current, buf, 1 );
+                } else {
+                    add_index_string(*return_value, objIndex++, buf, 1);
+                }
+            } else {
+                if (numVars) {
+                    current = args[objIndex++];
+                    convert_to_long( *current );
+                    Z_LVAL(**current) = value;
+                } else {
+                    add_index_long(*return_value, objIndex++, value);
+                }
+                   }
+          }
+
+         break;
+
+           case 'f':
+               /*
+                * Scan a floating point number
+                */
+                       buf[0] = '\0';     /* call me pedantic */
+            if ((width == 0) || (width > sizeof(buf) - 1)) {
+                width = sizeof(buf) - 1;
+            }
+            flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
+            for (end = buf; width > 0; width--) {
+                switch (*string) {
+                    case '0': case '1': case '2': case '3':
+                    case '4': case '5': case '6': case '7':
+                    case '8': case '9':
+                        flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
+                        goto addToFloat;
+                    case '+': case '-':
+                        if (flags & SCAN_SIGNOK) {
+                            flags &= ~SCAN_SIGNOK;
+                            goto addToFloat;
+                        }
+                        break;
+                    case '.':
+                        if (flags & SCAN_PTOK) {
+                            flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
+                            goto addToFloat;
+                        }
+                        break;
+                    case 'e': case 'E':
+                    /*
+                     * An exponent is not allowed until there has
+                     * been at least one digit.
+                     */
+
+                        if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
+                            flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
+                                | SCAN_SIGNOK | SCAN_NODIGITS;
+                            goto addToFloat;
+                        }
+                        break;
+                }
+
+                /*
+                 * We got an illegal character so we are done accumulating.
+                 */
+
+            break;
+
+                   addToFloat:
+                   /*
+                    * Add the character to the temporary buffer.
+                    */
+
+                *end++ = *string++;
+                if (*string == '\0') {
+                    break;
+                }
+            }
+
+               /*
+                * Check to see if we need to back up because we saw a
+                * trailing 'e' or sign.
+                */
+
+               if (flags & SCAN_NODIGITS) {
+                   if (flags & SCAN_EXPOK) {
+                /*
+                 * There were no digits at all so scanning has
+                 * failed and we are done.
+                 */
+                if (*string == '\0') {
+                    underflow = 1;
+                }
+                goto done;
+            }
+
+                   /*
+                    * We got a bad exponent ('e' and maybe a sign).
+                    */
+
+                   end--;
+                   string--;
+                   if (*end != 'e' && *end != 'E') {
+                end--;
+                string--;
+                   }
+               }
+
+               /*
+                * Scan the value from the temporary buffer.
+                */
+
+               if (!(flags & SCAN_SUPPRESS)) {
+                   double dvalue;
+                   *end = '\0';
+                   dvalue = strtod(buf, NULL);
+            if (numVars) {
+                current = args[objIndex++];
+                convert_to_double( *current );
+                Z_DVAL_PP( current ) = dvalue;
+            } else {
+                add_index_double( *return_value, objIndex++, dvalue );
+            }
+               }
+               break;
+    }     /* switch (op)              */
+       nconversions++;
+   }      /*  while (*format != '\0') */
+
+   done:
+       result = SCAN_SUCCESS;
+
+    if (underflow && (0==nconversions)) {
+               scan_set_error_return( numVars, return_value );
+        result = SCAN_ERROR_EOF;
+    } else if (numVars) {
+        convert_to_long( *return_value );
+        (*return_value)->value.lval = nconversions;
+       } else if (nconversions < totalVars) {
+               /* to do : not all elements converted. we need to prune the list - cc
+                */             
+       }
+
+    return result;
+}
+
+
+/* the compiler choked when i tried to make this a macro    */
+inline void scan_set_error_return(int numVars,pval **return_value) {
+       if (numVars) {
+               (*return_value)->type = IS_LONG;
+               (*return_value)->value.lval = SCAN_ERROR_EOF;  /* EOF marker */
+       } else {        
+               pval_destructor( *return_value );       
+               convert_to_null( *return_value );
+       }       
+}
+
+
diff --git a/ext/standard/scanf.h b/ext/standard/scanf.h
new file mode 100644 (file)
index 0000000..d95545d
--- /dev/null
@@ -0,0 +1,48 @@
+/* 
+   +----------------------------------------------------------------------+
+   | PHP version 4.0                                                      |
+   +----------------------------------------------------------------------+
+   | Copyright (c) 1997, 1998, 1999, 2000 The PHP Group                   |
+   +----------------------------------------------------------------------+
+   | This source file is subject to version 2.02 of the PHP license,      |
+   | that is bundled with this package in the file LICENSE, and is        |
+   | available at through the world-wide-web at                           |
+   | http://www.php.net/license/2_02.txt.                                 |
+   | If you did not receive a copy of the PHP license and are unable to   |
+   | obtain it through the world-wide-web, please send a note to          |
+   | license@php.net so we can mail you a copy immediately.               |
+   +----------------------------------------------------------------------+
+   | Authors: clayton collie <clcollie@mindspring.com>                    |
+   +----------------------------------------------------------------------+
+ */
+#ifndef  _SCAN_H_
+#define  _SCAN_H_
+
+
+#define SCAN_MAX_ARGS   0xFF    /* Maximum number of variable which can be      */
+                                /* passed to (f|s)scanf. This is an artifical   */
+                                /* upper limit to keep resources in check and   */
+                                /* minimize the possibility of exploits         */
+
+#define SCAN_MAX_FSCANF_BUFSIZE                512  /* Max input buffer allocated for fscanf */
+#define SCAN_SUCCESS                   SUCCESS 
+#define SCAN_ERROR_EOF                 -1      /* indicates premature termination of scan      */
+                                                                       /* can be caused by bad parameters or format*/
+                                                                       /* string.                                                                      */
+#define SCAN_ERROR_INVALID_FORMAT              (SCAN_ERROR_EOF - 1)
+#define SCAN_ERROR_VAR_PASSED_BYVAL            (SCAN_ERROR_INVALID_FORMAT - 1)
+#define SCAN_ERROR_WRONG_PARAM_COUNT   (SCAN_ERROR_VAR_PASSED_BYVAL - 1)
+#define SCAN_ERROR_INTERNAL             (SCAN_ERROR_WRONG_PARAM_COUNT - 1)
+
+
+/*  
+ * The following are here solely for the benefit of the scanf type functions
+ * e.g. fscanf
+ */
+PHPAPI int ValidateFormat(char *format, int numVars, int *totalVars);
+PHPAPI int php_sscanf_internal(char *string,char *format,int argCount,zval ***args, 
+                               int varStart,pval **return_value);
+inline void scan_set_error_return(int numVars,pval **return_value);
+
+
+#endif /* ifndef _SCAN_PHP_ */
index 04c682bdf930a2657f654c0fdd65f40c7d9fa615..bc9103c08834e8e018abee36d0a1fdcc8250ffdb 100644 (file)
@@ -30,6 +30,8 @@
 #ifdef HAVE_LOCALE_H
 # include <locale.h>
 #endif
+#include "scanf.h"
+#include "zend_API.h"
 #include "zend_execute.h"
 #include "php_globals.h"
 #include "basic_functions.h"
@@ -2560,7 +2562,49 @@ PHP_FUNCTION(substr_count)
 
        RETURN_LONG(count);
 }
-/* }}} */      
+/* }}} */
+
+   
+/* {{{ proto  mixed sscanf(string str,string format, ...)
+    implements an ANSI compatible sscanf. */
+PHP_FUNCTION(sscanf)
+{
+    zval **format;
+    zval **literal;
+    int  result;
+       zval ***args;
+       int     argCount;       
+
+
+    argCount = ZEND_NUM_ARGS();
+       if (argCount < 2) {
+               WRONG_PARAM_COUNT;
+       }
+       args = (zval ***)emalloc(argCount * sizeof(zval **));
+       if (!args || (zend_get_parameters_array_ex(argCount,args) == FAILURE)) {
+               efree( args );
+               WRONG_PARAM_COUNT;
+       }
+       
+       literal = args[0];
+       format  = args[1];
+
+       convert_to_string_ex( format );
+       convert_to_string_ex( literal );
+       
+       result = php_sscanf_internal( (*literal)->value.str.val,
+                                                                 (*format)->value.str.val,
+                                                                 argCount,args,
+                                                                 2,&return_value);
+       efree(args);
+
+       if (SCAN_ERROR_WRONG_PARAM_COUNT == result) {
+               WRONG_PARAM_COUNT
+       }
+
+}
+/* }}} */
+
 
 /*
  * Local variables: