--- /dev/null
+Mailparse MIME parsing and manipulation functions
+Wez Furlong
--- /dev/null
+this extension is experimental,
+its functions may change their names
+or move to extension all together
+so do not rely to much on them
+you have been warned!
--- /dev/null
+# $Id$
+LTLIBRARY_NAME = libmailparse.la
+ mailparse.c rfc2045.c \
+ rfc2045acchk.c rfc2045acprep.c \
+ rfc2045appendurl.c rfc2045cdecode.c rfc2045decode.c \
+ rfc2045find.c \
+ rfc822.c rfc822_getaddr.c \
+ rfc822_getaddrs.c
+include $(top_srcdir)/build/dynlib.mk
--- /dev/null
+mailparse library for PHP 4
+This library is build upon the librfc822 and librfc2045 libraries that
+originated from the maildrop component of the courier mail server.
+The copyright for most of the work belongs to Double Precision Inc.,
+although distribution of the library is carried out under the terms of the
+Example for PHP:
+$file = "/path/to/rfc822/compliant/message";
+$mime = mailparse_rfc2045_parse_file($file);
+$ostruct = mailparse_rfc2045_getstructure($mime);
+foreach($ostruct as $st) {
+ $section = mailparse_rfc2045_find($mime, $st);
+ $struct[$st] = mailparse_rfc2045_getinfo($section);
+array mailparse_rfc822_parse_addresses(string addresses)
+ parses an rfc822 compliant recipient list, such as that found in To: From:
+ headers. Returns a indexed array of assoc. arrays for each recipient:
+ array(0 => array("display" => "Wez Furlong", "address" => "wez@php.net"))
+resource mailparse_rfc2045_create()
+ Create a mime mail resource
+boolean mailparse_rfc2045_parse(resource mimemail, string data)
+ incrementally parse data into the supplied mime mail resource.
+ Concept: you can stream portions of a file at a time, rather than read
+ and parse the whole thing.
+resource mailparse_rfc2045_parse_file(string $filename)
+ Parse a file and return a $mime resource.
+ The file is opened and streamed through the parser.
+ This is the optimal way of parsing a mail file that
+ you have on disk.
+array mailparse_rfc2045_getstructure(resource mimemail)
+ returns an array containing a list of message parts in the form:
+ array("1", "1.1", "1.2")
+resource mailparse_rfc2045_find(resource mimemail, string partname)
+ returns an mime mail resource representing the named section
+array mailparse_rfc2045_getinfo(resource mimemail)
+ returns an array containing the bounds, content type and headers of the
+ section.
+mailparse_rfc2045_extract_file(resource mimemail, string filename[, string
+ callbackfunc])
+ Extracts/decodes a message section from the supplied filename.
+ If no callback func is supplied, it outputs the results into the current
+ output buffer, otherwise it calls the callback with a string parameter
+ containing the text.
+ The contents of the section will be decoded according to their transfer
+ encoding - base64, quoted-printable and uuencoded text are supported.
+All operations are done incrementally; streaming the input and output so that
+memory usage is on the whole lower than something like procmail or doing this
+stuff in PHP space. The aim is that it stays this way to handle large
+quantities of email.
+. Add support for binhex encoding?
+. Extracting a message part without decoding the transfer encoding so that
+ eg: pgp-signatures can be verified.
+. Work the other way around - build up a rfc2045 compliant message file from
+ simple structure information and filenames/variables.
--- /dev/null
+dnl $Id$
+dnl config.m4 for extension mailparse
+PHP_ARG_ENABLE(mailparse, whether to enable mailparse support,
+[ --enable-mailparse Enable mailparse support])
+if test "$PHP_MAILPARSE" != "no"; then
+ if test "$ext_shared" != "yes" && test "$enable_mbstring" != "yes"; then
+ AC_MSG_WARN(Activating mbstring)
+ enable_mbstring=yes
+ fi
+ PHP_EXTENSION(mailparse, $ext_shared)
--- /dev/null
+include $(top_builddir)/config_vars.mk
+targets = $(LTLIBRARY_NAME)
--- /dev/null
+ +----------------------------------------------------------------------+
+ | PHP version 4.0 |
+ +----------------------------------------------------------------------+
+ | Copyright (c) 1997, 1998, 1999, 2000, 2001 The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 2.02 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available at through the world-wide-web at |
+ | http://www.php.net/license/2_02.txt. |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: |
+ | Wez Furlong (wez@thebrainroom.com) |
+ | Credit also given to Double Precision Inc. who wrote the code that |
+ | the support routines for this extension were based upon. |
+ +----------------------------------------------------------------------+
+ */
+/* $Id$ */
+#include "php.h"
+#include "php_ini.h"
+#include "ext/standard/file.h"
+#include "php_mailparse.h"
+#include "mailparse_rfc822.h"
+#include "ext/standard/info.h"
+#include "ext/standard/php_output.h"
+/* just in case the config check doesn't enable mbstring automatically */
+#error The mailparse extension requires the mbstring extension!
+#include "ext/mbstring/mbfilter.h"
+static int le_rfc2045;
+/* this is for sections we "found": we mustn't free them, as this will cause
+ * a SEGFAULT when the parent is freed */
+static int le_rfc2045_nofree;
+function_entry mailparse_functions[] = {
+ PHP_FE(mailparse_msg_parse_file, NULL)
+ PHP_FE(mailparse_msg_get_part, NULL)
+ PHP_FE(mailparse_msg_get_structure, NULL)
+ PHP_FE(mailparse_msg_get_part_data, NULL)
+ PHP_FE(mailparse_msg_extract_part, NULL)
+ PHP_FE(mailparse_msg_extract_part_file, NULL)
+ PHP_FE(mailparse_msg_create, NULL)
+ PHP_FE(mailparse_msg_free, NULL)
+ PHP_FE(mailparse_msg_parse, NULL)
+ PHP_FE(mailparse_rfc822_parse_addresses, NULL)
+ PHP_FE(mailparse_determine_best_xfer_encoding, NULL)
+ PHP_FE(mailparse_stream_encode, NULL)
+zend_module_entry mailparse_module_entry = {
+ "mailparse",
+ mailparse_functions,
+ PHP_MINIT(mailparse),
+ PHP_MSHUTDOWN(mailparse),
+ PHP_RINIT(mailparse),
+ PHP_RSHUTDOWN(mailparse),
+ PHP_MINFO(mailparse),
+ rfc2045_free(rsrc->ptr);
+ STD_PHP_INI_ENTRY("mailparse.def_charset", RFC2045CHARSET, PHP_INI_ALL, OnUpdateString, def_charset, zend_mailparse_globals, mailparse_globals)
+#define mailparse_msg_name "mailparse_mail_structure"
+#define mailparse_fetch_rfc2045_resource(rfcvar, zvalarg) ZEND_FETCH_RESOURCE2(rfcvar, struct rfc2045 *, zvalarg, -1, mailparse_msg_name, le_rfc2045, le_rfc2045_nofree)
+ le_rfc2045 = zend_register_list_destructors_ex(rfc2045_dtor, NULL, mailparse_msg_name, module_number);
+ le_rfc2045_nofree = zend_register_list_destructors_ex(NULL, NULL, mailparse_msg_name, module_number);
+ return SUCCESS;
+ return SUCCESS;
+ php_info_print_table_start();
+ php_info_print_table_header(2, "mailparse support", "enabled");
+ php_info_print_table_end();
+ return SUCCESS;
+ return SUCCESS;
+static void mailparse_rfc822t_errfunc(const char * msg, int num)
+ php_error(E_WARNING, "%s(): %s %d", get_active_function_name(), msg, num);
+/* {{{ proto array mailparse_rfc822_parse_addresses(string addresses)
+ parse addresses and return a hash containing that data
+ zval ** addresses;
+ struct rfc822t * tokens;
+ struct rfc822a * addrs;
+ int i;
+ if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &addresses) == FAILURE) {
+ }
+ convert_to_string_ex(addresses);
+ tokens = mailparse_rfc822t_alloc(Z_STRVAL_PP(addresses), mailparse_rfc822t_errfunc);
+ if (tokens) {
+ addrs = mailparse_rfc822a_alloc(tokens);
+ if (addrs) {
+ array_init(return_value);
+ for (i = 0; i < addrs->naddrs; i++) {
+ char * p;
+ zval * item;
+ MAKE_STD_ZVAL(item);
+ if (array_init(item) == FAILURE)
+ break;
+ p = mailparse_rfc822_getname(addrs, i);
+ add_assoc_string(item, "display", p, 0); /* don't duplicate - getname allocated the memory for us */
+ p = mailparse_rfc822_getaddr(addrs, i);
+ add_assoc_string(item, "address", p, 0); /* don't duplicate - getaddr allocated the memory for us */
+ /* add this to the result */
+ zend_hash_next_index_insert(HASH_OF(return_value), &item, sizeof(item), NULL);
+ }
+ mailparse_rfc822a_free(addrs);
+ }
+ mailparse_rfc822t_free(tokens);
+ }
+/* }}} */
+/* {{{ proto long mailparse_determine_best_xfer_encoding(resource fp)
+ figure out the best way of encoding the content read from the file pointer fp, which must be seek-able.
+ zval ** file;
+ FILE * fp;
+ int longline = 0;
+ int linelen = 0;
+ int c;
+ enum mbfl_no_encoding bestenc = mbfl_no_encoding_7bit;
+ void * what;
+ int type;
+ char * name;
+ if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &file) == FAILURE) {
+ }
+ what = zend_fetch_resource(file, -1, "File-Handle", &type, 2, php_file_le_fopen(), php_file_le_stream());
+ if (type == php_file_le_stream()) {
+ php_stream * stream = (php_stream*)what;
+ php_stream_rewind(stream);
+ while(!php_stream_eof(stream)) {
+ c = php_stream_getc(stream);
+ if (c == EOF)
+ break;
+ if (c > 0x80)
+ bestenc = mbfl_no_encoding_8bit;
+ else if (c == 0) {
+ bestenc = mbfl_no_encoding_base64;
+ longline = 0;
+ break;
+ }
+ if (c == '\n')
+ linelen = 0;
+ else if (++linelen > 200)
+ longline = 1;
+ }
+ if (longline)
+ bestenc = mbfl_no_encoding_qprint;
+ php_stream_rewind(stream);
+ }
+ else {
+ fp = (FILE*)what;
+ rewind(fp);
+ while(!feof(fp)) {
+ c = fgetc(fp);
+ if (c == EOF)
+ break;
+ if (c > 0x80)
+ bestenc = mbfl_no_encoding_8bit;
+ else if (c == 0) {
+ bestenc = mbfl_no_encoding_base64;
+ longline = 0;
+ break;
+ }
+ if (c == '\n')
+ linelen = 0;
+ else if (++linelen > 200)
+ longline = 1;
+ }
+ if (longline)
+ bestenc = mbfl_no_encoding_qprint;
+ rewind(fp);
+ }
+ name = (char *)mbfl_no2preferred_mime_name(bestenc);
+ if (name)
+ {
+ RETVAL_STRING(name, 1);
+ }
+ else
+ {
+ }
+/* }}} */
+/* {{{ proto boolean mailparse_stream_encode(resource sourcefp, resource destfp, string encoding)
+ stream data from source file pointer, apply encoding and write to destfp
+static int mailparse_fp_output(int c, void * fp)
+ return fputc(c, (FILE*)fp);
+static int mailparse_fp_flush(void * fp)
+ return fflush((FILE*)fp);
+ zval ** srcfile, ** destfile, ** encod;
+ FILE * srcfp, * destfp;
+ char * buf;
+ size_t len;
+ size_t bufsize = 2048;
+ enum mbfl_no_encoding enc;
+ mbfl_convert_filter * conv = NULL;
+ if (ZEND_NUM_ARGS() != 3 || zend_get_parameters_ex(3, &srcfile, &destfile, &encod) == FAILURE) {
+ }
+ if ((*srcfile)->type == IS_RESOURCE && (*srcfile)->value.lval == 0) {
+ }
+ ZEND_FETCH_RESOURCE(srcfp, FILE *, srcfile, -1, "File-Handle", php_file_le_fopen());
+ if ((*destfile)->type == IS_RESOURCE && (*destfile)->value.lval == 0) {
+ }
+ ZEND_FETCH_RESOURCE(destfp, FILE *, destfile, -1, "File-Handle", php_file_le_fopen());
+ convert_to_string_ex(encod);
+ enc = mbfl_name2no_encoding(Z_STRVAL_PP(encod));
+ if (enc == mbfl_no_encoding_invalid) {
+ zend_error(E_WARNING, "%s(): unknown encoding \"%s\"",
+ get_active_function_name(),
+ Z_STRVAL_PP(encod)
+ );
+ }
+ convert_to_long_ex(encod);
+ enc = Z_LVAL_PP(encod);
+ buf = emalloc(bufsize);
+ conv = mbfl_convert_filter_new(mbfl_no_encoding_8bit,
+ enc,
+ mailparse_fp_output,
+ mailparse_fp_flush,
+ destfp
+ );
+ while(!feof(srcfp)) {
+ len = fread(buf, sizeof(char), bufsize, srcfp);
+ if (len > 0)
+ {
+ int i;
+ for (i=0; i<len; i++)
+ mbfl_convert_filter_feed(buf[i], conv);
+ }
+ }
+ mbfl_convert_filter_flush(conv);
+ mbfl_convert_filter_delete(conv);
+ efree(buf);
+/* }}} */
+/* {{{ proto void mailparse_msg_parse(resource rfc2045buf, string data)
+ Incrementally parse data into buffer
+ zval ** arg, ** data;
+ struct rfc2045 * rfcbuf;
+ if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &arg, &data) == FAILURE) {
+ }
+ if ((*arg)->type == IS_RESOURCE && (*arg)->value.lval == 0) {
+ }
+ mailparse_fetch_rfc2045_resource(rfcbuf, arg);
+ convert_to_string_ex(data);
+ rfc2045_parse(rfcbuf, (*data)->value.str.val, (*data)->value.str.len);
+/* }}} */
+/* {{{ proto resource mailparse_msg_parse_file(string filename)
+ Parse file and return a resource representing the structure
+ zval ** filename;
+ struct rfc2045 * rfcbuf;
+ char * filebuf;
+ FILE * fp;
+ if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &filename) == FAILURE) {
+ }
+ convert_to_string_ex(filename);
+ /* open file and read it in */
+ fp = VCWD_FOPEN(Z_STRVAL_PP(filename), "r");
+ if (fp == NULL) {
+ zend_error(E_WARNING, "%s(): unable to open file %s", get_active_function_name(), Z_STRVAL_PP(filename));
+ }
+ filebuf = emalloc(MAILPARSE_BUFSIZ);
+ rfcbuf = rfc2045_alloc_ac();
+ if (rfcbuf) {
+ ZEND_REGISTER_RESOURCE(return_value, rfcbuf, le_rfc2045);
+ while(!feof(fp)) {
+ int got = fread(filebuf, sizeof(char), MAILPARSE_BUFSIZ, fp);
+ if (got > 0) {
+ rfc2045_parse(rfcbuf, filebuf, got);
+ }
+ }
+ fclose(fp);
+ }
+ efree(filebuf);
+/* }}} */
+/* {{{ proto void mailparse_msg_free(resource rfc2045buf)
+ Frees a handle allocated by mailparse_msg_create
+ zval **arg;
+ struct rfc2045 * rfcbuf;
+ if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg) == FAILURE) {
+ }
+ if ((*arg)->type == IS_RESOURCE && (*arg)->value.lval == 0) {
+ }
+ ZEND_FETCH_RESOURCE(rfcbuf, struct rfc2045 *, arg, -1, mailparse_msg_name, le_rfc2045);
+ zend_list_delete((*arg)->value.lval);
+/* }}} */
+/* {{{ proto long mailparse_msg_create()
+ Return a handle that can be used to parse a message
+ struct rfc2045 * rfcbuf;
+ rfcbuf = rfc2045_alloc_ac();
+ ZEND_REGISTER_RESOURCE(return_value, rfcbuf, le_rfc2045);
+/* }}} */
+static void get_structure_callback(struct rfc2045 *p, struct rfc2045id * id, void * ptr)
+ zval * return_value = (zval *)ptr;
+ char intbuf[16];
+ char buf[256];
+ int len, i = 0;
+ while(id && i < sizeof(buf)) {
+ sprintf(intbuf, "%d", id->idnum);
+ len = strlen(intbuf);
+ if (len > (sizeof(buf)-i)) {
+ /* too many sections: bail */
+ zend_error(E_WARNING, "%s(): too many nested sections in message", get_active_function_name());
+ return;
+ }
+ sprintf(&buf[i], "%s%c", intbuf, id->next ? '.' : '\0');
+ i += len + (id->next ? 1 : 0);
+ id = id->next;
+ }
+ add_next_index_string(return_value, buf,1);
+/* {{{ proto array mailparse_msg_get_structure(resource rfc2045)
+ Returns an array of mime section names in the supplied message
+ zval **arg;
+ struct rfc2045 * rfcbuf;
+ if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg) == FAILURE) {
+ }
+ if ((*arg)->type == IS_RESOURCE && (*arg)->value.lval == 0) {
+ }
+ mailparse_fetch_rfc2045_resource(rfcbuf, arg);
+ if (array_init(return_value) == FAILURE) {
+ }
+ rfc2045_decode(rfcbuf, &get_structure_callback, return_value);
+/* }}} */
+/* callback for decoding using a "userdefined" php function */
+static int extract_callback_user_func(const char *p, size_t n, zval *userfunc)
+ zval * retval;
+ zval * arg;
+ MAKE_STD_ZVAL(retval);
+ retval->type = IS_BOOL;
+ retval->value.lval = 0;
+ ZVAL_STRINGL(arg, (char*)p, (int)n, 1);
+ /* TODO: use zend_is_callable */
+ if (call_user_function(EG(function_table), NULL, userfunc, retval, 1, &arg) == FAILURE)
+ zend_error(E_WARNING, "%s(): unable to call user function", get_active_function_name());
+ zval_dtor(retval);
+ zval_dtor(arg);
+ efree(retval);
+ efree(arg);
+ return 0;
+/* callback for decoding to the current output buffer */
+static int extract_callback_stdout(const char *p, size_t n, void *ptr)
+ ZEND_WRITE(p, n);
+ return 0;
+/* {{{ proto void mailparse_msg_extract_part(resource rfc2045, string msgbody[, string callbackfunc])
+ Extracts/decodes a message section. If callbackfunc is not specified, the contents will be sent to "stdout".
+ zval **arg, **bodystr, **cbfunc;
+ struct rfc2045 * rfcbuf;
+ off_t start, end, body;
+ off_t nlines;
+ off_t nbodylines;
+ switch(ZEND_NUM_ARGS()) {
+ case 3:
+ if (zend_get_parameters_ex(3, &arg, &bodystr, &cbfunc) == FAILURE) {
+ }
+ if (Z_TYPE_PP(cbfunc) != IS_ARRAY)
+ convert_to_string_ex(cbfunc);
+ break;
+ case 2:
+ if (zend_get_parameters_ex(2, &arg, &bodystr) == FAILURE) {
+ }
+ cbfunc = NULL;
+ break;
+ }
+ convert_to_string_ex(bodystr);
+ if ((*arg)->type == IS_RESOURCE && (*arg)->value.lval == 0) {
+ }
+ mailparse_fetch_rfc2045_resource(rfcbuf, arg);
+ rfc2045_mimepos(rfcbuf, &start, &end, &body, &nlines, &nbodylines);
+ if (cbfunc)
+ rfc2045_cdecode_start(rfcbuf, (rfc2045_decode_user_func_t)&extract_callback_user_func, *cbfunc);
+ else
+ rfc2045_cdecode_start(rfcbuf, &extract_callback_stdout, NULL);
+ if (Z_STRLEN_PP(bodystr) < end)
+ end = Z_STRLEN_PP(bodystr);
+ else
+ end = end-body;
+ rfc2045_cdecode(rfcbuf, Z_STRVAL_PP(bodystr) + body, end);
+ rfc2045_cdecode_end(rfcbuf);
+/* }}} */
+/* {{{ proto string mailparse_msg_extract_part_file(resource rfc2045, string filename[, string callbackfunc])
+ Extracts/decodes a message section, decoding the transfer encoding
+ zval **arg, **filename, **cbfunc;
+ struct rfc2045 * rfcbuf;
+ char * filebuf = NULL;
+ FILE * fp = NULL;
+ off_t start, end, body;
+ off_t nlines;
+ off_t nbodylines;
+ switch(ZEND_NUM_ARGS()) {
+ case 3:
+ if (zend_get_parameters_ex(3, &arg, &filename, &cbfunc) == FAILURE) {
+ }
+ if (Z_TYPE_PP(cbfunc) != IS_ARRAY)
+ convert_to_string_ex(cbfunc);
+ break;
+ case 2:
+ if (zend_get_parameters_ex(2, &arg, &filename) == FAILURE) {
+ }
+ cbfunc = NULL;
+ break;
+ }
+ convert_to_string_ex(filename);
+ if ((*arg)->type == IS_RESOURCE && (*arg)->value.lval == 0) {
+ }
+ mailparse_fetch_rfc2045_resource(rfcbuf, arg);
+ /* figure out where the message part starts/ends */
+ rfc2045_mimepos(rfcbuf, &start, &end, &body, &nlines, &nbodylines);
+ if (cbfunc)
+ rfc2045_cdecode_start(rfcbuf, (rfc2045_decode_user_func_t)&extract_callback_user_func, *cbfunc);
+ else
+ rfc2045_cdecode_start(rfcbuf, &extract_callback_stdout, NULL);
+ /* open file and read it in */
+ fp = VCWD_FOPEN(Z_STRVAL_PP(filename), "rb");
+ if (fp == NULL) {
+ zend_error(E_WARNING, "%s(): unable to open file %s", get_active_function_name(), Z_STRVAL_PP(filename));
+ }
+ if (fseek(fp, body, SEEK_SET) == -1)
+ {
+ zend_error(E_WARNING, "%s(): unable to seek to section start", get_active_function_name());
+ goto cleanup;
+ }
+ filebuf = emalloc(MAILPARSE_BUFSIZ);
+ while (body < end)
+ {
+ size_t n = MAILPARSE_BUFSIZ;
+ if ((off_t)n > end-body)
+ n=end-body;
+ n = fread(filebuf, sizeof(char), n, fp);
+ if (n == 0)
+ {
+ zend_error(E_WARNING, "%s(): error reading from file \"%s\", offset %d", get_active_function_name(), Z_STRVAL_PP(filename), body);
+ goto cleanup;
+ }
+ rfc2045_cdecode(rfcbuf, filebuf, n);
+ body += n;
+ }
+ rfc2045_cdecode_end(rfcbuf);
+ if (fp)
+ fclose(fp);
+ if (filebuf)
+ efree(filebuf);
+/* }}} */
+/* {{{ proto array mailparse_msg_get_part_data(resource rfc2045)
+ Return an assoc. array of info about the message
+/* NOTE: you may add keys to the array, but PLEASE do not remove the key/value pairs
+ that are emitted here - it will break my PHP scripts if you do! */
+ zval ** arg;
+ struct rfc2045 * rfcbuf;
+ char * content_type, *transfer_encoding, *charset;
+ off_t start, end, body, nlines, nbodylines;
+ char * disposition, * disposition_name, *disposition_filename;
+ char *p;
+ struct rfc2045attr * attr;
+ zval * headers;
+ if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg) == FAILURE) {
+ }
+ if ((*arg)->type == IS_RESOURCE && (*arg)->value.lval == 0) {
+ }
+ mailparse_fetch_rfc2045_resource(rfcbuf, arg);
+ if (array_init(return_value) == FAILURE) {
+ }
+ rfc2045_mimeinfo(rfcbuf, (const char**)&content_type, (const char**)&transfer_encoding, (const char**)&charset);
+ rfc2045_mimepos(rfcbuf, &start, &end, &body, &nlines, &nbodylines);
+ if (content_type && *content_type)
+ add_assoc_string(return_value, "content-type", content_type, 1);
+ /* get attributes for content-type */
+ attr = rfcbuf->content_type_attr;
+ while (attr != NULL) {
+ char buf[80];
+ strcpy(buf, "content-");
+ strcat(buf, attr->name);
+ add_assoc_string(return_value, buf, attr->value, 1);
+ attr = attr->next;
+ }
+ /* get attributes for content-disposition */
+ attr = rfcbuf->content_disposition_attr;
+ while (attr != NULL) {
+ char buf[80];
+ strcpy(buf, "disposition-");
+ strcat(buf, attr->name);
+ add_assoc_string(return_value, buf, attr->value, 1);
+ attr = attr->next;
+ }
+ /* get headers for this section */
+ MAKE_STD_ZVAL(headers);
+ *headers = *rfcbuf->headerhash;
+ INIT_PZVAL(headers);
+ zval_copy_ctor(headers);
+ /* add to result */
+ zend_hash_update(HASH_OF(return_value), "headers", sizeof("headers"), &headers, sizeof(headers), NULL);
+ add_assoc_string(return_value, "transfer-encoding", transfer_encoding, 1);
+ add_assoc_string(return_value, "charset", charset, 1);
+ rfc2045_dispositioninfo(rfcbuf, (const char**)&disposition, (const char**)&disposition_name, (const char**)&disposition_filename);
+ if (disposition && *disposition)
+ add_assoc_string(return_value, "content-disposition", disposition, 1);
+ if (*(p=(char*)rfc2045_content_id(rfcbuf)))
+ add_assoc_string(return_value, "content-id", p, 1);
+ if (*(p=(char*)rfc2045_content_description(rfcbuf)))
+ add_assoc_string(return_value, "content-description", p, 1);
+ if (*(p=(char*)rfc2045_content_language(rfcbuf)))
+ add_assoc_string(return_value, "content-language", p, 1);
+ if (*(p=(char*)rfc2045_content_md5(rfcbuf)))
+ add_assoc_string(return_value, "content-md5", p, 1);
+ if (*(p=(char*)rfc2045_content_base(rfcbuf))) {
+ add_assoc_string(return_value, "content-base", p, 1);
+ /* content base allocates mem */
+ efree(p);
+ }
+ add_assoc_long(return_value, "starting-pos", start);
+ add_assoc_long(return_value, "starting-pos-body", body);
+ add_assoc_long(return_value, "ending-pos", end);
+ add_assoc_long(return_value, "line-count", nlines);
+ add_assoc_long(return_value, "body-line-count", nbodylines);
+/* }}} */
+/* {{{ proto long mailparse_msg_get_part(resource rfc2045, string mimesection)
+ Return a handle on a given section in a mimemessage
+ zval ** arg, ** mimesection;
+ struct rfc2045 * rfcbuf, * newsection;
+ if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &arg, &mimesection) == FAILURE) {
+ }
+ if ((*arg)->type == IS_RESOURCE && (*arg)->value.lval == 0) {
+ }
+ mailparse_fetch_rfc2045_resource(rfcbuf, arg);
+ convert_to_string_ex(mimesection);
+ newsection = rfc2045_find(rfcbuf, (*mimesection)->value.str.val);
+ if (!newsection) {
+ php_error(E_WARNING, "%s(): cannot find section %s in message", get_active_function_name(), (*mimesection)->value.str.val);
+ }
+ ZEND_REGISTER_RESOURCE(return_value, newsection, le_rfc2045_nofree);
+/* }}} */
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim: tw=78 ts=4 sw=4
+ */
--- /dev/null
+/* $Id$ */
+#ifndef mailparse_rfc822_h
+#define mailparse_rfc822_h
+** Copyright 1998 - 2000 Double Precision, Inc.
+** See COPYING for distribution information.
+#include <time.h>
+#ifdef __cplusplus
+extern "C" {
+** The text string we want to parse is first tokenized into an array of
+** struct rfc822token records. 'ptr' points into the original text
+** string, and 'len' has how many characters from 'ptr' belongs to this
+** token.
+struct rfc822token {
+ struct rfc822token *next; /* Unused by librfc822, for use by
+ ** clients */
+ int token;
+ Values for token:
+ '(' - comment
+ '"' - quoted string
+ '<', '>', '@', ',', ';', ':', '.', '[', ']', '%', '!', '=', '?', '/' - RFC atoms.
+ 0 - atom
+#define mailparse_rfc822_is_atom(p) ( (p) == 0 || (p) == '"' || (p) == '(' )
+ const char *ptr; /* Pointer to value for the token. */
+ int len; /* Length of token value */
+} ;
+** After the struct rfc822token array is built, it is used to create
+** the rfc822addr array, which is the array of addresses (plus
+** syntactical fluff) extracted from those text strings. Each rfc822addr
+** record has several possible interpretation:
+** tokens is NULL - syntactical fluff, look in name/nname for tokens
+** representing the syntactical fluff ( which is semicolons
+** and list name:
+** tokens is not NULL - actual address. The tokens representing the actual
+** address is in tokens/ntokens. If there are comments in
+** the address that are possible "real name" for the address
+** they are saved in name/nname (name may be null if there
+** is none).
+** If nname is 1, and name points to a comment token,
+** the address was specified in old-style format. Otherwise
+** the address was specified in new-style route-addr format.
+** The tokens and name pointers are set to point to the original rfc822token
+** array.
+struct rfc822addr {
+ struct rfc822token *tokens;
+ struct rfc822token *name;
+} ;
+** rfc822 tokens
+struct rfc822t {
+ struct rfc822token *tokens;
+ int ntokens;
+} ;
+struct rfc822t * mailparse_rfc822t_alloc(const char *p,
+ void (*err_func)(const char *, int)); /* Parse addresses */
+void mailparse_rfc822t_free(struct rfc822t *); /* Free rfc822 structure */
+void mailparse_rfc822tok_print(const struct rfc822token *, void (*)(char, void *), void *);
+ /* Print the tokens */
+** rfc822 addresses
+struct rfc822a {
+ struct rfc822addr *addrs;
+ int naddrs;
+} ;
+struct rfc822a * mailparse_rfc822a_alloc(struct rfc822t *);
+void mailparse_rfc822a_free(struct rfc822a *); /* Free rfc822 structure */
+void mailparse_rfc822_deladdr(struct rfc822a *, int);
+/* rfc822_print "unparses" the rfc822 structure. Each rfc822addr is "printed"
+ (via the attached function). NOTE: instead of separating addresses by
+ commas, the print_separator function is called.
+void mailparse_rfc822_print(const struct rfc822a *a,
+ void (*print_func)(char, void *),
+ void (*print_separator)(const char *, void *), void *);
+/* rfc822_print_common is an internal function */
+void mailparse_rfc822_print_common(const struct rfc822a *a,
+ char *(*decode_func)(const char *, const char *),
+ const char *chset,
+ void (*print_func)(char, void *),
+ void (*print_separator)(const char *, void *), void *);
+/* Another unparser, except that only the raw addresses are extracted,
+ and each address is followed by a newline character */
+void mailparse_rfc822_addrlist(const struct rfc822a *, void (*print_func)(char, void *),
+ void *);
+/* Now, just the comments. If comments not given, the address. */
+void mailparse_rfc822_namelist(const struct rfc822a *, void (*print_func)(char, void *),
+ void *);
+/* Unparse an individual name/addr from a list of addresses. If the given
+ index points to some syntactical fluff, this is a noop */
+void mailparse_rfc822_prname(const struct rfc822a *, int, void (*)(char, void *), void *);
+void mailparse_rfc822_praddr(const struct rfc822a *, int, void (*)(char, void *), void *);
+/* Like rfc822_prname, except that we'll also print the legacy format
+** of a list designation.
+void mailparse_rfc822_prname_orlist(const struct rfc822a *, int,
+ void (*)(char, void *), void *);
+/* Extra functions */
+char *mailparse_rfc822_gettok(const struct rfc822token *);
+char *mailparse_rfc822_getaddr(const struct rfc822a *, int);
+char *mailparse_rfc822_getname(const struct rfc822a *, int);
+char *mailparse_rfc822_getname_orlist(const struct rfc822a *, int);
+char *mailparse_rfc822_getaddrs(const struct rfc822a *);
+char *mailparse_rfc822_getaddrs_wrap(const struct rfc822a *, int);
+char *mailparse_rfc822_coresubj(const char *, int *);
+char *mailparse_rfc822_coresubj_nouc(const char *, int *);
+#ifdef __cplusplus
--- /dev/null
+ +----------------------------------------------------------------------+
+ | PHP version 4.0 |
+ +----------------------------------------------------------------------+
+ | Copyright (c) 1997, 1998, 1999, 2000 The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 2.02 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available at through the world-wide-web at |
+ | http://www.php.net/license/2_02.txt. |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: |
+ | Wez Furlong <wez@thebrainroom.com> |
+ | Credit also given to Double Precision Inc. who wrote the code that |
+ | the support routines for this extension were based upon. |
+ +----------------------------------------------------------------------+
+ */
+/* $Id$ */
+extern zend_module_entry mailparse_module_entry;
+#define phpext_mailparse_ptr &mailparse_module_entry
+#ifdef PHP_WIN32
+#define PHP_MAILPARSE_API __declspec(dllexport)
+#include "rfc2045.h"
+#include "mailparse_rfc822.h"
+#define MAILPARSE_BUFSIZ 4096
+ char * def_charset; /* default charset for use in (re)writing mail */
+#ifdef ZTS
+#define MAILPARSEG(v) (mailparse_globals->v)
+#define MAILPARSELS_FETCH() zend_mailparse_globals *mailparse_globals = ts_resource(mailparse_globals_id)
+#define MAILPARSEG(v) (mailparse_globals.v)
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim: sw=4 ts=4 tw=78
+ */
--- /dev/null
+/* $Id$ */
+ ** Copyright 1998 - 1999 Double Precision, Inc. See COPYING for
+ ** distribution information.
+ */
+#include "php.h"
+#include "php_mailparse.h"
+#define MAXLEVELS 20
+#define MAXPARTS 300
+ New RFC2045 structure.
+ */
+struct rfc2045 *rfc2045_alloc()
+ struct rfc2045 *p=(struct rfc2045 *)emalloc(sizeof(struct rfc2045));
+ /* Initialize everything to nulls, except for one thing */
+ memset(p, 0, sizeof(*p));
+ p->pindex=1; /* Start with part #1 */
+ /* Most of the time, we're about to read a header */
+ p->workinheader=1;
+ MAKE_STD_ZVAL(p->headerhash);
+ array_init(p->headerhash);
+ return (p);
+const char *rfc2045_getattr(const struct rfc2045attr *p, const char *name)
+ while (p)
+ {
+ if (p->name && strcmp(p->name, name) == 0)
+ return (p->value);
+ p=p->next;
+ }
+ return (0);
+void rfc2045_setattr(struct rfc2045attr **p, const char *name, const char *val)
+ char *v;
+ while (*p)
+ {
+ if (strcmp( (*p)->name, name) == 0) break;
+ p=&(*p)->next;
+ }
+ if (val == 0)
+ {
+ struct rfc2045attr *q= *p;
+ if (q)
+ {
+ *p=q->next;
+ if (q->name) efree(q->name);
+ if (q->value) efree(q->value);
+ efree(q);
+ }
+ return;
+ }
+ v = estrdup(val);
+ if (!*p)
+ {
+ *p = (struct rfc2045attr *)emalloc(sizeof(**p));
+ memset( (*p), 0, sizeof(**p));
+ (*p)->name = estrdup(name);
+ }
+ if ( (*p)->value )
+ efree ( (*p)->value );
+ (*p)->value=v;
+/* static const char cb_name[]="boundary"; */
+/* #define ContentBoundary(p) (rfc2045_getattr( (p)->content_type_attr, cb_name)) */
+#define ContentBoundary(p) ( (p)->boundary )
+ Unallocate the RFC2045 structure. Recursively unallocate
+ all sub-structures. Unallocate all associated buffers.
+ */
+static void rfc2045_freeattr(struct rfc2045attr *p)
+ while (p)
+ {
+ struct rfc2045attr *q=p->next;
+ if (p->name) efree(p->name);
+ if (p->value) efree(p->value);
+ efree(p);
+ p=q;
+ }
+void rfc2045_free(struct rfc2045 *p)
+ struct rfc2045 *q, *r;
+ for (q=p->firstpart; q; )
+ {
+ r=q->next;
+ rfc2045_free(q);
+ q=r;
+ }
+ rfc2045_freeattr(p->content_type_attr);
+ rfc2045_freeattr(p->content_disposition_attr);
+ if (p->content_md5) efree(p->content_md5);
+ if (p->content_base) efree(p->content_base);
+ if (p->content_location) efree(p->content_location);
+ if (p->content_language) efree(p->content_language);
+ if (p->content_id) efree(p->content_id);
+ if (p->content_description) efree(p->content_description);
+ if (p->content_transfer_encoding) efree(p->content_transfer_encoding);
+ if (p->boundary) efree(p->boundary);
+ if (p->content_type) efree(p->content_type);
+ if (p->mime_version) efree(p->mime_version);
+ if (p->workbuf) efree(p->workbuf);
+ if (p->header) efree(p->header);
+ if (p->content_disposition) efree(p->content_disposition);
+ if (p->rw_transfer_encoding) efree(p->rw_transfer_encoding);
+ if (p->rfc2045acptr)
+ efree(p->rfc2045acptr);
+ zval_dtor(p->headerhash);
+ efree(p->headerhash);
+ efree(p);
+ Generic dynamic buffer append.
+ */
+void rfc2045_add_buf(
+ char **bufptr, /* Buffer */
+ size_t *bufsize, /* Buffer's maximum size */
+ size_t *buflen, /* Buffer's current size */
+ const char *p, size_t len) /* Append this data */
+ if (len + *buflen > *bufsize)
+ {
+ size_t newsize=len+*buflen+256;
+ char *p= *bufptr ? (char *)erealloc(*bufptr, newsize):
+ (char *)emalloc(newsize);
+ *bufptr=p;
+ *bufsize=newsize;
+ }
+ memcpy(*bufptr + *buflen, p, len);
+ *buflen += len;
+/* Append to the work buffer */
+void rfc2045_add_workbuf(struct rfc2045 *h, const char *p, size_t len)
+ rfc2045_add_buf( &h->workbuf, &h->workbufsize, &h->workbuflen, p, len);
+/* Append one character to the work buffer */
+void rfc2045_add_workbufch(struct rfc2045 *h, int c)
+ char cc= (char)c;
+ rfc2045_add_workbuf(h, &cc, 1);
+ Generic function to duplicate contents of a string.
+ The destination string may already be previously allocated,
+ so unallocate it.
+ */
+static void set_string(char **p,
+ const char *q)
+ if (*p) {
+ efree(*p);
+ *p=0;
+ }
+ if (!q) return;
+ *p = estrdup(q);
+/* Update byte counts for this structure, and all the superstructures */
+static void update_counts(struct rfc2045 *p, size_t newcnt, size_t newendcnt,
+ unsigned nlines)
+ while (p)
+ {
+ p->endpos = newcnt;
+ p->endbody = newendcnt;
+ p->nlines += nlines;
+ if (!p->workinheader)
+ p->nbodylines += nlines;
+ p=p->parent;
+ }
+ Main entry point for RFC2045 parsing. External data is fed
+ by repetitively calling rfc2045_parse().
+ rfc2045_parse() breaks up input into lines, and calls doline()
+ to process each line.
+ */
+static void doline(struct rfc2045 *);
+void rfc2045_parse(struct rfc2045 *h, const char *buf, size_t s)
+ size_t l;
+ while (s)
+ {
+ for (l=0; l<s; l++)
+ if (buf[l] == '\n') break;
+ if (l < s && buf[l] == '\n')
+ {
+ ++l;
+ rfc2045_add_workbuf(h, buf, l);
+ doline(h);
+ h->workbuflen=0;
+ }
+ else
+ rfc2045_add_workbuf(h, buf, l);
+ buf += l;
+ s -= l;
+ }
+ /*
+ ** Our buffer's getting pretty big. Let's see if we can
+ ** partially handle it.
+ */
+ if (h->workbuflen > 512)
+ {
+ struct rfc2045 *p;
+ int l, i;
+ for (p=h; p->lastpart && !p->lastpart->workclosed;
+ p=p->lastpart)
+ ;
+ /* If p->workinheader, we've got a mother of all headers
+ ** here. Well, that's just too bad, we'll end up garbling
+ ** it.
+ */
+ l=h->workbuflen;
+ /* We do need to make sure that the final \r\n gets
+ ** stripped off, so don't gobble up everything if
+ ** the last character we see is a \r
+ */
+ if (h->workbuf[l-1] == '\r')
+ --l;
+ /* If we'll be rewriting, make sure rwprep knows about
+ ** stuff that was skipped just now. */
+ if (h->rfc2045acptr && !p->workinheader &&
+ (!p->lastpart || !p->lastpart->workclosed))
+ (*h->rfc2045acptr->section_contents)(h->rfc2045acptr, h->workbuf, l);
+ update_counts(p, p->endpos+l, p->endpos+l, 0);
+ p->informdata=1;
+ for (i=0; l<h->workbuflen; l++)
+ h->workbuf[i++]=h->workbuf[l];
+ h->workbuflen=i;
+ }
+ Append a new RFC2045 subpart. Adds new RFC2045 structure to the
+ end of the list of existing RFC2045 substructures.
+ */
+static struct rfc2045 *append_part_noinherit(struct rfc2045 *p, size_t startpos)
+ struct rfc2045 *newp;
+ newp=rfc2045_alloc();
+ if (p->lastpart)
+ {
+ p->lastpart->next=newp;
+ newp->pindex=p->lastpart->pindex+1;
+ }
+ else
+ {
+ p->firstpart=newp;
+ newp->pindex=0;
+ }
+ p->lastpart=newp;
+ newp->parent=p;
+ /* Initialize source pointers */
+ newp->startpos = newp->endpos = newp->startbody = newp->endbody = startpos;
+ while (p->parent)
+ p=p->parent;
+ ++p->numparts;
+ return (newp);
+static struct rfc2045 *append_part(struct rfc2045 *p, size_t startpos)
+ struct rfc2045 *newp=append_part_noinherit(p, startpos);
+ /* Substructures inherit content transfer encoding and character set */
+ set_string(&newp->content_transfer_encoding,
+ p->content_transfer_encoding);
+ rfc2045_setattr(&newp->content_type_attr, "charset",
+ rfc2045_getattr(p->content_type_attr, "charset"));
+ return (newp);
+ doline() processes next line in the RFC2045 message.
+ Drills down the list of all the multipart messages currently open,
+ and checks if the line is a boundary line for the given multipart.
+ In theory the boundary line, if there is one, should be the boundary
+ line only for the inner multipart only, but, this takes into account
+ broken MIME messages.
+ */
+static void do_header(struct rfc2045 *);
+static void doline(struct rfc2045 *p)
+ size_t cnt=p->workbuflen;
+ char *c=p->workbuf;
+ size_t n=cnt-1; /* Strip \n (we always get at least a \n here) */
+ struct rfc2045 *newp;
+ struct rfc2045ac *rwp=p->rfc2045acptr;
+ unsigned num_levels=0;
+ size_t k;
+ int bit8=0;
+ if (p->numparts > MAXPARTS)
+ {
+ p->rfcviolation |= RFC2045_ERR2COMPLEX;
+ return;
+ }
+ for (k=0; k<cnt; k++)
+ if (c[k] & 0x80) bit8=1;
+ if (n && c[n-1] == '\r') /* Strip trailing \r */
+ --n;
+ /* Before the main drill down loop before, look ahead and see if we're
+ ** in a middle of a form-data section. */
+ for (newp=p; newp->lastpart &&
+ !newp->lastpart->workclosed; newp=newp->lastpart,
+ ++num_levels)
+ {
+ if (ContentBoundary(newp) == 0 || newp->workinheader)
+ continue;
+ if (newp->lastpart->informdata)
+ {
+ p=newp->lastpart;
+ p->informdata=0;
+ break;
+ }
+ }
+ /* Drill down until we match a boundary, or until we've reached
+ the last RFC2045 section that has been opened.
+ */
+ while (p->lastpart)
+ {
+ size_t l;
+ const char *cb;
+ if (p->lastpart->workclosed)
+ {
+ update_counts(p, p->endpos+cnt, p->endpos+cnt, 1);
+ return;
+ }
+ /* Leftover trash -- workclosed is set when the final
+ ** terminating boundary has been seen */
+ /* content_boundary may be set before the entire header
+ ** has been seen, so continue drilling down in that case
+ */
+ cb=ContentBoundary(p);
+ if (cb == 0 || p->workinheader)
+ {
+ p=p->lastpart;
+ ++num_levels;
+ continue;
+ }
+ l=strlen(cb);
+ if (c[0] == '-' && c[1] == '-' && n >= 2+l &&
+ strncasecmp(cb, c+2, l) == 0)
+ {
+ if (rwp && (!p->lastpart || !p->lastpart->isdummy))
+ (*rwp->end_section)(rwp);
+ /* Ok, we've found a boundary */
+ if (n >= 4+l && strncmp(c+2+l, "--", 2) == 0)
+ {
+ /* Last boundary */
+ p->lastpart->workclosed=1;
+ update_counts(p, p->endpos+cnt, p->endpos+cnt,
+ 1);
+ return;
+ }
+ /* Create new RFC2045 section */
+ newp=append_part(p, p->endpos+cnt);
+ update_counts(p, p->endpos+cnt, p->endpos+n, 1);
+ /* The new RFC2045 section is MIME compliant */
+ newp->mime_version = estrdup(p->mime_version);
+ return;
+ }
+ p=p->lastpart;
+ ++num_levels;
+ }
+ /* Ok, we've found the RFC2045 section that we're working with.
+ ** Now what?
+ */
+ if (! p->workinheader)
+ {
+ /* Processing body, just update the counts. */
+ size_t cnt_update=cnt;
+ if (bit8 && !p->content_8bit &&
+ (p->rfcviolation & RFC2045_ERR8BITCONTENT) == 0)
+ {
+ struct rfc2045 *q;
+ for (q=p; q; q=q->parent)
+ q->rfcviolation |= RFC2045_ERR8BITCONTENT;
+ }
+ /*
+ ** In multiparts, the final newline in a part belongs to the
+ ** boundary, otherwise, include it in the text.
+ */
+ if (p->parent && p->parent->content_type &&
+ strncasecmp(p->parent->content_type,
+ "multipart/", 10) == 0)
+ cnt_update=n;
+ if (!p->lastpart || !p->lastpart->workclosed)
+ {
+ if (rwp && !p->isdummy)
+ (*rwp->section_contents)(rwp, c, cnt);
+ update_counts(p, p->endpos+cnt, p->endpos+cnt_update,
+ 1);
+ }
+ return;
+ }
+ if (bit8 && (p->rfcviolation & RFC2045_ERR8BITHEADER) == 0)
+ {
+ struct rfc2045 *q;
+ for (q=p; q; q=q->parent)
+ q->rfcviolation |= RFC2045_ERR8BITHEADER;
+ }
+ /* In the header */
+ if ( n == 0 ) /* End of header, body begins. Parse header. */
+ {
+ do_header(p); /* Clean up any left over header line */
+ p->workinheader=0;
+ /* Message body starts right here */
+ p->startbody=p->endpos+cnt;
+ update_counts(p, p->startbody, p->startbody, 1);
+ --p->nbodylines; /* Don't count the blank line */
+ /* Discard content type and boundary if I don't understand this MIME flavor.
+ * Allow broken messages that omit the Mime-Version header to still be
+ * parsed.
+ */
+ if (p->mime_version == NULL && p->content_type != NULL) {
+ /* technically in violation of the spec, but there are some broken
+ * mailers out there that send this. Sadly, they are so broken
+ * they don't set X-Mailer so we can't tell what they are...
+ * Lets be useful and allow it, but flag it as a boo-boo */
+ p->mime_version = estrdup("1.0");
+ p->rfcviolation |= RFC2045_ERRNOMIMEVERSION;
+ }
+ if (!RFC2045_ISMIME1(p->mime_version))
+ {
+ set_string(&p->content_type, 0);
+ rfc2045_freeattr(p->content_type_attr);
+ p->content_type_attr=0;
+ set_string(&p->content_disposition, 0);
+ rfc2045_freeattr(p->content_disposition_attr);
+ p->content_disposition_attr=0;
+ if (p->boundary)
+ {
+ efree(p->boundary);
+ p->boundary=0;
+ }
+ }
+ /* Normally, if we don't have a content_type, default it
+ ** to text/plain. However, if the multipart type is
+ ** multipart/digest, it is message/rfc822.
+ */
+ if (RFC2045_ISMIME1(p->mime_version) && !p->content_type)
+ {
+ char *q="text/plain";
+ if (p->parent && p->parent->content_type &&
+ strcmp(p->parent->content_type,
+ "multipart/digest") == 0)
+ q="message/rfc822";
+ set_string(&p->content_type, q);
+ }
+ /* If this is not a multipart section, we don't want to
+ ** hear about any boundaries
+ */
+ if (!p->content_type ||
+ strncmp(p->content_type, "multipart/", 10))
+ rfc2045_setattr(&p->content_type_attr, "boundary", 0);
+ /* If this section's a message, we will expect to see
+ ** more RFC2045 stuff, so create a nested RFC2045 structure,
+ ** and indicate that we expect to see headers.
+ */
+ if (p->content_type &&
+ strcmp(p->content_type, "message/rfc822") == 0)
+ {
+ newp=append_part_noinherit(p, p->startbody);
+ newp->workinheader=1;
+ return;
+ }
+ /*
+ ** If this is a multipart message (boundary defined),
+ ** create a RFC2045 structure for the pseudo-section
+ ** that precedes the first boundary line.
+ */
+ if (ContentBoundary(p))
+ {
+ newp=append_part(p, p->startbody);
+ newp->workinheader=0;
+ newp->isdummy=1;
+ /* It's easier just to create it. */
+ return;
+ }
+ if (rwp)
+ (*rwp->start_section)(rwp, p);
+ return;
+ }
+ /* RFC822 header continues */
+ update_counts(p, p->endpos + cnt, p->endpos+n, 1);
+ /* If this header line starts with a space, append one space
+ ** to the saved contents of the previous line, and append this
+ ** line to it.
+ */
+ if (isspace((int)(unsigned char)*c))
+ {
+ rfc2045_add_buf(&p->header, &p->headersize, &p->headerlen, " ", 1);
+ }
+ else
+ {
+ /* Otherwise the previous header line is complete, so process it */
+ do_header(p);
+ p->headerlen=0;
+ }
+ /* Save this line in the header buffer, because the next line
+ ** could be a continuation.
+ */
+ rfc2045_add_buf( &p->header, &p->headersize, &p->headerlen, c, n);
+ ** paste_tokens() - recombine an array of RFC822 tokens back as a string.
+ ** (Comments) are ignored.
+ */
+static char *paste_tokens(struct rfc822t *h, int start, int cnt)
+ int l;
+ int i;
+ char *p;
+ /* Calculate string size */
+ l=1;
+ for (i=0; i<cnt; i++)
+ {
+ if (h->tokens[start+i].token == '(')
+ continue;
+ if (mailparse_rfc822_is_atom(h->tokens[start+i].token))
+ l += h->tokens[start+i].len;
+ else
+ l++;
+ }
+ /* Do it */
+ p=( char *)emalloc(l);
+ l=0;
+ for (i=0; i<cnt; i++)
+ {
+ if (h->tokens[start+i].token == '(')
+ continue;
+ if (mailparse_rfc822_is_atom(h->tokens[start+i].token))
+ {
+ int l2=h->tokens[start+i].len;
+ memcpy(p+l, h->tokens[start+i].ptr, l2);
+ l += l2;
+ }
+ else p[l++]=h->tokens[start+i].token;
+ }
+ p[l]=0;
+ return (p);
+/* Various permutations of the above, including forcing the string to
+ ** lowercase
+ */
+static char *lower_paste_tokens(struct rfc822t *h, int start, int cnt)
+ char *p=paste_tokens(h, start, cnt);
+ char *q;
+ for (q=p; q && *q; q++)
+ *q=tolower(*q);
+ return (p);
+static char *paste_token(struct rfc822t *h, int i)
+ if (i >= h->ntokens) return (0);
+ return (paste_tokens(h, i, 1));
+static char *lower_paste_token(struct rfc822t *h, int i)
+ char *p=paste_token(h, i);
+ char *q;
+ for (q=p; q && *q; q++)
+ *q=tolower(*q);
+ return (p);
+ do_header() - process completed RFC822 header.
+ */
+static void mime_version(struct rfc2045 *, struct rfc822t *);
+static void content_type(struct rfc2045 *, struct rfc822t *);
+static void content_transfer_encoding(struct rfc2045 *, struct rfc822t *);
+static void content_disposition(struct rfc2045 *, struct rfc822t *);
+static void content_id(struct rfc2045 *, struct rfc822t *);
+static void content_description(struct rfc2045 *, const char *);
+static void content_language(struct rfc2045 *, const char *);
+static void content_md5(struct rfc2045 *, const char *);
+static void content_base(struct rfc2045 *, struct rfc822t *);
+static void content_location(struct rfc2045 *, struct rfc822t *);
+static void do_header(struct rfc2045 *p)
+ struct rfc822t *header;
+ char *t;
+ char * val;
+ if (p->headerlen == 0) return;
+ rfc2045_add_buf( &p->header, &p->headersize, &p->headerlen, "", 1);
+ /* 0 terminate */
+ /* Parse the header line according to RFC822 */
+ header=mailparse_rfc822t_alloc(p->header, NULL);
+ if (!header) return; /* Broken header */
+ if (header->ntokens < 2 ||
+ header->tokens[0].token ||
+ header->tokens[1].token != ':')
+ {
+ mailparse_rfc822t_free(header);
+ return; /* Broken header */
+ }
+ t=lower_paste_token(header, 0);
+ if (t != 0) {
+ /* add the header to the hash */
+ val = strchr(p->header, ':');
+ if (val) {
+ val++;
+ while(isspace(*val))
+ val++;
+ add_assoc_string(p->headerhash, t, val, 1);
+ }
+ if (strcmp(t, "mime-version") == 0)
+ {
+ efree(t);
+ mime_version(p, header);
+ }
+ else if (strcmp(t, "content-type") == 0)
+ {
+ efree(t);
+ content_type(p, header);
+ } else if (strcmp(t, "content-transfer-encoding") == 0)
+ {
+ efree(t);
+ content_transfer_encoding(p, header);
+ } else if (strcmp(t, "content-disposition") == 0)
+ {
+ efree(t);
+ content_disposition(p, header);
+ } else if (strcmp(t, "content-id") == 0)
+ {
+ efree(t);
+ content_id(p, header);
+ } else if (strcmp(t, "content-description") == 0)
+ {
+ efree(t);
+ t=strchr(p->header, ':');
+ if (t) ++t;
+ while (t && isspace((int)(unsigned char)*t))
+ ++t;
+ content_description(p, t);
+ } else if (strcmp(t, "content-language") == 0)
+ {
+ efree(t);
+ t=strchr(p->header, ':');
+ if (t) ++t;
+ while (t && isspace((int)(unsigned char)*t))
+ ++t;
+ content_language(p, t);
+ } else if (strcmp(t, "content-base") == 0)
+ {
+ efree(t);
+ content_base(p, header);
+ } else if (strcmp(t, "content-location") == 0)
+ {
+ efree(t);
+ content_location(p, header);
+ } else if (strcmp(t, "content-md5") == 0)
+ {
+ efree(t);
+ t=strchr(p->header, ':');
+ if (t) ++t;
+ while (t && isspace((int)(unsigned char)*t))
+ ++t;
+ content_md5(p, t);
+ }
+ else efree(t);
+ }
+ mailparse_rfc822t_free(header);
+/* Mime-Version: and Content-Transfer-Encoding: headers are easy */
+static void mime_version(struct rfc2045 *p, struct rfc822t *header)
+ char *vers=paste_tokens(header, 2, header->ntokens-2);
+ if (!vers) return;
+ if (p->mime_version) efree(p->mime_version);
+ p->mime_version=vers;
+static void content_transfer_encoding(struct rfc2045 *r,
+ struct rfc822t *header)
+ char *p;
+ p=lower_paste_tokens(header, 2, header->ntokens-2);
+ if (!p) return;
+ if (r->content_transfer_encoding)
+ efree(r->content_transfer_encoding);
+ r->content_transfer_encoding=p;
+ if (strcmp(p, "8bit") == 0)
+ r->content_8bit=1;
+/* Dig into the content_type header */
+static void parse_content_header(struct rfc2045 *r, struct rfc822t *header,
+ void (*init_token)(struct rfc2045 *, char *),
+ void (*init_parameter)(struct rfc2045 *, const char *,
+ struct rfc822t *, int, int))
+ int start;
+ int i, j;
+ char *p;
+ /* Look for the 1st ; */
+ for (start=2; start < header->ntokens; start++)
+ if (header->tokens[start].token == ';')
+ break;
+ /* Everything up to the 1st ; is the content type */
+ p=lower_paste_tokens(header, 2, start-2);
+ if (!p) return;
+ (*init_token)(r, p);
+ if (start < header->ntokens) start++;
+ /* Handle the remainder of the Content-Type: header */
+ while (start < header->ntokens)
+ {
+ /* Look for next ; */
+ for (i=start; i<header->ntokens; i++)
+ if (header->tokens[i].token == ';')
+ break;
+ j=start;
+ if (j < i)
+ {
+ ++j;
+ /* We only understand <atom>= */
+ while (j < i && header->tokens[j].token == '(')
+ ++j;
+ if (j < i && header->tokens[j].token == '=')
+ {
+ ++j;
+ p=lower_paste_token(header, start);
+ if (!p) return;
+ (*init_parameter)(r, p, header, j, i-j);
+ efree(p);
+ }
+ }
+ if ( i<header->ntokens ) ++i; /* Skip over ; */
+ start=i;
+ }
+/* Dig into the content_type header */
+static void save_content_type(struct rfc2045 *, char *);
+static void save_content_type_parameter( struct rfc2045 *, const char *,
+ struct rfc822t *, int, int);
+static void content_type(struct rfc2045 *r, struct rfc822t *header)
+ parse_content_header(r, header, &save_content_type,
+ &save_content_type_parameter);
+static void save_content_type(struct rfc2045 *r, char *content_type)
+ if (r->content_type) efree(r->content_type);
+ r->content_type=content_type;
+static void save_content_type_parameter(
+ struct rfc2045 *r, const char *name,
+ struct rfc822t *header, int start, int len)
+ char *p;
+ p=strcmp(name, "charset") == 0 ?
+ lower_paste_tokens(header, start, len):
+ paste_tokens(header, start, len);
+ if (!p) return;
+ rfc2045_setattr(&r->content_type_attr, name, p);
+ efree(p);
+ if (strcmp(name, "boundary") == 0)
+ {
+ if (r->boundary)
+ efree(r->boundary);
+ p=lower_paste_tokens(header, start, len);
+ r->boundary=p;
+ }
+/* Dig into content-disposition */
+static void save_content_disposition(struct rfc2045 *, char *);
+static void save_content_disposition_parameter( struct rfc2045 *, const char *,
+ struct rfc822t *, int, int);
+static void content_disposition(struct rfc2045 *r, struct rfc822t *header)
+ parse_content_header(r, header, &save_content_disposition,
+ &save_content_disposition_parameter);
+static void save_content_disposition(struct rfc2045 *r,
+ char *content_disposition)
+ if (r->content_disposition) efree(r->content_disposition);
+ r->content_disposition=content_disposition;
+static void save_content_disposition_parameter(
+ struct rfc2045 *r, const char *name,
+ struct rfc822t *header, int start, int len)
+ char *p;
+ p=paste_tokens(header, start, len);
+ if (!p) return;
+ rfc2045_setattr(&r->content_disposition_attr, name, p);
+ efree(p);
+char *rfc2045_related_start(const struct rfc2045 *p)
+ const char *cb=rfc2045_getattr( p->content_type_attr, "start");
+ struct rfc822t *t;
+ struct rfc822a *a;
+ int i;
+ if (!cb || !*cb) return (0);
+ t=mailparse_rfc822t_alloc(cb, 0);
+ a=mailparse_rfc822a_alloc(t);
+ for (i=0; i<a->naddrs; i++)
+ if (a->addrs[i].tokens)
+ {
+ char *s=mailparse_rfc822_getaddr(a, i);
+ mailparse_rfc822a_free(a);
+ mailparse_rfc822t_free(t);
+ return (s);
+ }
+ mailparse_rfc822a_free(a);
+ mailparse_rfc822t_free(t);
+ return (0);
+static void content_id(struct rfc2045 *p, struct rfc822t *t)
+ struct rfc822a *a=mailparse_rfc822a_alloc(t);
+ int i;
+ for (i=0; i<a->naddrs; i++)
+ if (a->addrs[i].tokens)
+ {
+ char *s=mailparse_rfc822_getaddr(a, i);
+ if (p->content_id)
+ efree(p->content_id);
+ p->content_id=s;
+ break;
+ }
+ mailparse_rfc822a_free(a);
+static void content_description(struct rfc2045 *p, const char *s)
+ if (s && *s)
+ set_string(&p->content_description, s);
+static void content_language(struct rfc2045 *p, const char *s)
+ if (s && *s)
+ set_string(&p->content_language, s);
+static void content_md5(struct rfc2045 *p, const char *s)
+ if (s && *s)
+ set_string(&p->content_md5, s);
+static void content_base(struct rfc2045 *p, struct rfc822t *t)
+ char *s;
+ int i;
+ for (i=0; i<t->ntokens; i++)
+ if (t->tokens[i].token == '"')
+ t->tokens[i].token=0;
+ s=paste_tokens(t, 2, t->ntokens-2);
+ set_string(&p->content_base, s);
+static void content_location(struct rfc2045 *p, struct rfc822t *t)
+ char *s;
+ int i;
+ for (i=0; i<t->ntokens; i++)
+ if (t->tokens[i].token == '"')
+ t->tokens[i].token=0;
+ s=paste_tokens(t, 2, t->ntokens-2);
+ set_string(&p->content_location, s);
+/* -------------------- */
+#define GETINFO(s, def) ( (s) && (*s) ? (s):def)
+void rfc2045_mimeinfo(const struct rfc2045 *p,
+ const char **content_type_s,
+ const char **content_transfer_encoding_s,
+ const char **charset_s)
+ const char *c;
+ *content_type_s=GETINFO(p->content_type, "text/plain");
+ *content_transfer_encoding_s=GETINFO(p->content_transfer_encoding,
+ "8bit");
+ c=rfc2045_getattr(p->content_type_attr, "charset");
+ if (!c)
+ c = MAILPARSEG(def_charset);
+ *charset_s=c;
+const char *rfc2045_boundary(const struct rfc2045 *p)
+ const char *cb=rfc2045_getattr( p->content_type_attr, "boundary");
+ if (!cb) cb="";
+ return (cb);
+void rfc2045_dispositioninfo(const struct rfc2045 *p,
+ const char **disposition_s,
+ const char **disposition_name_s,
+ const char **disposition_filename_s)
+ *disposition_s=p->content_disposition;
+ *disposition_name_s=rfc2045_getattr(p->content_disposition_attr,
+ "name");
+ *disposition_filename_s=rfc2045_getattr(p->content_disposition_attr,
+ "filename");
+const char *rfc2045_contentname(const struct rfc2045 *p)
+ const char *q=rfc2045_getattr(p->content_type_attr, "name");
+ if (!q) q="";
+ return (q);
+const char *rfc2045_content_id(const struct rfc2045 *p)
+ return (p->content_id ? p->content_id:"");
+const char *rfc2045_content_description(const struct rfc2045 *p)
+ return (p->content_description ? p->content_description:"");
+const char *rfc2045_content_language(const struct rfc2045 *p)
+ return (p->content_language ? p->content_language:"");
+const char *rfc2045_content_md5(const struct rfc2045 *p)
+ return (p->content_md5 ? p->content_md5:"");
+void rfc2045_mimepos(const struct rfc2045 *p,
+ off_t *start_pos, off_t *end_pos, off_t *start_body,
+ off_t *nlines, off_t *nbodylines)
+ *start_pos=p->startpos;
+ *end_pos=p->endpos;
+ *nlines=p->nlines;
+ *nbodylines=p->nbodylines;
+ if (p->parent) /* MIME parts do not have the trailing CRLF */
+ {
+ *end_pos=p->endbody;
+ if (*nlines) --*nlines;
+ if (*nbodylines) --*nbodylines;
+ }
+ *start_body=p->startbody;
+unsigned rfc2045_mimepartcount(const struct rfc2045 *p)
+ const struct rfc2045 *q;
+ unsigned n=0;
+ for (q=p->firstpart; q; q=q->next) ++n;
+ return (n);
+ * vim:tw=78 sw=4 ts=4
+ * */
--- /dev/null
+** Copyright 1998 - 2000 Double Precision, Inc. See COPYING for
+** distribution information.
+** $Id$
+#ifndef rfc2045_h
+#define rfc2045_h
+#include "php_mailparse.h"
+#include "ext/mbstring/mbfilter.h"
+#define RFC2045CHARSET "us-ascii"
+#define RFC2045MIMEMSG "This is a MIME-formatted message.\n"
+#ifdef __cplusplus
+extern "C" {
+#define RFC2045_ISMIME1(p) ((p) && atoi(p) == 1)
+#define RFC2045_ISMIME1DEF(p) (!(p) || atoi(p) == 1)
+struct rfc2045;
+/* callback for de/encoding */
+typedef int (*rfc2045_decode_user_func_t)(const char *p, size_t n, void *ptr);
+typedef int (*rfc2045_decode_func_t)(struct rfc2045 * part, const char * buf, size_t n);
+/* the attributes of a given header */
+struct rfc2045attr {
+ struct rfc2045attr *next;
+ char *name;
+ char *value;
+struct rfc2045 {
+ struct rfc2045 *parent;
+ unsigned pindex;
+ struct rfc2045 *next;
+ off_t startpos, /* At which offset in msg this section starts */
+ endpos, /* Where it ends */
+ startbody, /* Where the body of the msg starts */
+ endbody; /* endpos - trailing CRLF terminator */
+ off_t nlines; /* Number of lines in message */
+ off_t nbodylines; /* Number of lines only in the body */
+ char *mime_version;
+ char *content_type;
+ struct rfc2045attr *content_type_attr; /* Content-Type: attributes */
+ char *content_disposition;
+ char *boundary;
+ struct rfc2045attr *content_disposition_attr;
+ char *content_transfer_encoding;
+ /* Set if content_transfer_encoding is 8bit */
+ int content_8bit;
+ char *content_id;
+ char *content_description;
+ char *content_language;
+ char *content_md5;
+ char *content_base;
+ char *content_location;
+ struct rfc2045ac *rfc2045acptr;
+ int has8bitchars; /* For rewriting */
+ int haslongline; /* For rewriting */
+ unsigned rfcviolation; /* Boo-boos */
+#define RFC2045_ERR8BITHEADER 1 /* 8 bit characters in headers */
+#define RFC2045_ERR8BITCONTENT 2 /* 8 bit contents, but no 8bit content-transfer-encoding */
+#define RFC2045_ERR2COMPLEX 4 /* Too many nested contents */
+#define RFC2045_ERRNOMIMEVERSION 8 /* missing Mime-Version header, but boundary set in content type */
+ unsigned numparts; /* # of parts allocated */
+ char *rw_transfer_encoding; /* For rewriting */
+#define RFC2045_RW_7BIT 1
+#define RFC2045_RW_8BIT 2
+ /* Subsections */
+ struct rfc2045 *firstpart, *lastpart;
+ /* Working area */
+ char *workbuf;
+ size_t workbufsize;
+ size_t workbuflen;
+ int workinheader;
+ int workclosed;
+ int isdummy;
+ int informdata; /* In a middle of a long form-data part */
+ char *header;
+ size_t headersize;
+ size_t headerlen;
+ zval * headerhash; /* a record of all of the headers */
+ /* decoding filter to use */
+ mbfl_convert_filter * decode_filter;
+ /* "user" function to accept the decoding output */
+ rfc2045_decode_user_func_t udecode_func;
+ /* this is passed as the last param to the user decode func */
+ void *misc_decode_ptr;
+} ;
+struct rfc2045 *rfc2045_alloc();
+void rfc2045_parse(struct rfc2045 *, const char *, size_t);
+void rfc2045_free(struct rfc2045 *);
+const char *rfc2045_contentname(const struct rfc2045 *);
+void rfc2045_mimeinfo(const struct rfc2045 *,
+ const char **,
+ const char **,
+ const char **);
+const char *rfc2045_boundary(const struct rfc2045 *);
+char *rfc2045_related_start(const struct rfc2045 *);
+const char *rfc2045_content_id(const struct rfc2045 *);
+const char *rfc2045_content_description(const struct rfc2045 *);
+const char *rfc2045_content_language(const struct rfc2045 *);
+const char *rfc2045_content_md5(const struct rfc2045 *);
+void rfc2045_dispositioninfo(const struct rfc2045 *,
+ const char **,
+ const char **,
+ const char **);
+void rfc2045_mimepos(const struct rfc2045 *, off_t *, off_t *, off_t *,
+ off_t *, off_t *);
+unsigned rfc2045_mimepartcount(const struct rfc2045 *);
+struct rfc2045id {
+ struct rfc2045id *next;
+ int idnum;
+} ;
+void rfc2045_decode(struct rfc2045 *,
+ void (*)(struct rfc2045 *, struct rfc2045id *, void *),
+ void *);
+struct rfc2045 *rfc2045_find(struct rfc2045 *, const char *);
+/* begin an en/decoding process */
+void rfc2045_cdecode_start(struct rfc2045 *, rfc2045_decode_user_func_t cb, void *);
+int rfc2045_cdecode(struct rfc2045 *, const char *, size_t);
+int rfc2045_cdecode_end(struct rfc2045 *);
+struct rfc2045ac {
+ void (*start_section)(struct rfc2045ac *, struct rfc2045 *);
+ void (*section_contents)(struct rfc2045ac *, const char *, size_t);
+ void (*end_section)(struct rfc2045ac *);
+ /* private vars used in acprep */
+ int curlinepos;
+ struct rfc2045 *currwp;
+ enum {
+ raw,
+ quotedprint,
+ qpseeneq,
+ qpseeneqh,
+ base64
+ } curstate;
+ int statechar;
+struct rfc2045 *rfc2045_alloc_ac();
+int rfc2045_ac_check(struct rfc2045 *, int);
+int rfc2045_rewrite(struct rfc2045 *, int, int, const char *);
+int rfc2045_rewrite_func(struct rfc2045 *p, int,
+ int (*)(const char *, int, void *), void *,
+ const char *);
+/* Internal functions */
+int rfc2045_try_boundary(struct rfc2045 *, int, const char *);
+char *rfc2045_mk_boundary(struct rfc2045 *, int);
+const char *rfc2045_getattr(const struct rfc2045attr *, const char *);
+void rfc2045_setattr(struct rfc2045attr **, const char *, const char *);
+/* MIME content base/location */
+char *rfc2045_content_base(struct rfc2045 *p);
+ /* This joins Content-Base: and Content-Location:, as best as I
+ ** can figure it out.
+ */
+char *rfc2045_append_url(const char *, const char *);
+ /* Do this with two arbitrary URLs */
+#ifdef __cplusplus
--- /dev/null
+/* $Id$ */
+** Copyright 1998 - 1999 Double Precision, Inc. See COPYING for
+** distribution information.
+#include "php.h"
+#include "php_mailparse.h"
+int rfc2045_ac_check(struct rfc2045 *p, int rwmode)
+ int flag=0; /* Flag - rewriting suggested */
+ struct rfc2045 *c;
+ int hasnon7bit=p->has8bitchars;
+ /* hasnon7bit: 8bit chars in this section or subsections */
+ const char *te;
+ int is8bitte;
+ for (c=p->firstpart; c; c=c->next)
+ if (!c->isdummy)
+ {
+ if (rfc2045_ac_check(c, rwmode)) flag=1;
+ if (strcmp(c->content_transfer_encoding, "7bit") &&
+ strcmp(c->content_transfer_encoding, "quoted-printable"))
+ hasnon7bit=1;
+ if (c->has8bitchars)
+ p->has8bitchars=1;
+ }
+ if (RFC2045_ISMIME1DEF(p->mime_version) && !p->content_type)
+ {
+ p->content_type = estrdup("text/plain");
+ if (p->mime_version)
+ {
+ flag=1;
+ }
+ }
+ if (RFC2045_ISMIME1DEF(p->mime_version)
+ && !rfc2045_getattr(p->content_type_attr, "charset")
+ && strncasecmp(p->content_type, "text/", 5) == 0)
+ {
+ rfc2045_setattr(&p->content_type_attr, "charset",
+ MAILPARSEG(def_charset));
+ if (p->mime_version
+ && p->firstpart == 0 /* sam - don't trigger rewrites on changes to multipart headers */
+ )
+ {
+ flag=1;
+ }
+ }
+ if (RFC2045_ISMIME1DEF(p->mime_version)
+ && !p->content_transfer_encoding)
+ {
+ p->content_transfer_encoding = estrdup(hasnon7bit ? "8bit":"7bit");
+ if (p->mime_version
+ && p->firstpart == 0 /* sam - don't trigger rewrites on changes to multipart headers */
+ )
+ {
+ flag=1;
+ }
+ }
+#if 0
+ if (RFC2045_ISMIME1DEF(p->mime_version)
+ && strncmp(p->content_type, "text/", 5) == 0 && !hasnon7bit
+ && strcmp(p->content_transfer_encoding, "7bit"))
+ {
+ if (p->mime_version)
+ {
+ flag=1;
+ }
+ }
+ if (RFC2045_ISMIME1DEF(p->mime_version))
+ {
+ /* Check for conversions */
+ te=p->content_transfer_encoding;
+ is8bitte=strcasecmp(te, "base64") &&
+ strcasecmp(te, "quoted-printable") &&
+ strcasecmp(te, "7bit"); /* 8 bit contents */
+ if (is8bitte && !p->has8bitchars && !p->haslongline)
+ {
+ if (p->rw_transfer_encoding)
+ efree(p->rw_transfer_encoding);
+ p->rw_transfer_encoding=estrdup("7bit");
+ flag=1;
+ is8bitte=0;
+ }
+ if (rwmode == RFC2045_RW_7BIT && (is8bitte || p->haslongline))
+ {
+ if (p->rw_transfer_encoding)
+ efree(p->rw_transfer_encoding);
+ p->rw_transfer_encoding=estrdup("quoted-printable");
+ flag=1;
+ }
+ else if (rwmode == RFC2045_RW_8BIT &&
+ strcasecmp(te, "quoted-printable") == 0 &&
+ !p->haslongline)
+ {
+ if (p->rw_transfer_encoding)
+ efree(p->rw_transfer_encoding);
+ p->rw_transfer_encoding=estrdup(hasnon7bit ? "8bit":"7bit");
+ flag=1;
+ }
+ }
+ if (!p->mime_version)
+ {
+ p->mime_version = estrdup("1.0");
+ }
+ return (flag);
--- /dev/null
+/* $Id$ */
+** Copyright 1998 - 1999 Double Precision, Inc. See COPYING for
+** distribution information.
+#include "php.h"
+#include "php_mailparse.h"
+static void start_rwprep(struct rfc2045ac *, struct rfc2045 *);
+static void do_rwprep(struct rfc2045ac *, const char *, size_t);
+static void end_rwprep(struct rfc2045ac *);
+static struct rfc2045ac rfc2045acprep={
+ &start_rwprep,
+ &do_rwprep,
+ &end_rwprep};
+#define h2nyb(c) ( (c) >= 'a' && (c) <= 'f' ? (c)-('a'-10): \
+ (c) >= 'A' && (c) <= 'F' ? (c)-('A'-10): (c)-'0')
+struct rfc2045 *rfc2045_alloc_ac()
+ struct rfc2045 *p=rfc2045_alloc();
+ if (p)
+ {
+ p->rfc2045acptr = emalloc(sizeof(struct rfc2045ac));
+ memcpy(p->rfc2045acptr, &rfc2045acprep, sizeof(struct rfc2045ac));
+ p->rfc2045acptr->curlinepos = 0;
+ p->rfc2045acptr->currwp = NULL;
+ }
+ return (p);
+static void start_rwprep(struct rfc2045ac * this_ptr, struct rfc2045 *p)
+ this_ptr->currwp = p;
+ this_ptr->curlinepos=0;
+ this_ptr->curstate=raw;
+ if (p->content_transfer_encoding)
+ {
+ if (strcmp(p->content_transfer_encoding,
+ "quoted-printable") == 0)
+ this_ptr->curstate = quotedprint;
+ else if (strcmp(p->content_transfer_encoding, "base64") == 0)
+ this_ptr->curstate = base64;
+ }
+static void do_rwprep(struct rfc2045ac * this_ptr, const char * p, size_t n)
+ if (!this_ptr->currwp)
+ return;
+ for ( ; n; --n, ++p)
+ switch (this_ptr->curstate) {
+ case quotedprint:
+ if (*p == '=')
+ {
+ this_ptr->curstate = qpseeneq;
+ continue;
+ }
+ /* FALLTHRU */
+ case raw:
+ if (*p == '\r' || *p == '\n')
+ this_ptr->curlinepos = 0;
+ else if (++this_ptr->curlinepos > 500)
+ this_ptr->currwp->haslongline = 1;
+ if ((unsigned char)*p >= 127)
+ this_ptr->currwp->has8bitchars = 1;
+ break;
+ case qpseeneq:
+ if (*p == '\n')
+ {
+ this_ptr->curstate = quotedprint;
+ continue;
+ }
+ if (isspace((int)(unsigned char)*p)) continue; /* Ignore WSP */
+ this_ptr->statechar = *p;
+ this_ptr->curstate = qpseeneqh;
+ continue;
+ case qpseeneqh:
+ this_ptr->curstate = quotedprint;
+ if ( (unsigned char)
+ ( (h2nyb(this_ptr->statechar) << 4) + h2nyb(*p) ) >= 127
+ ) this_ptr->currwp->has8bitchars=1;
+ if (++this_ptr->curlinepos > 500)
+ this_ptr->currwp->haslongline=1;
+ continue;
+ case base64:
+ break;
+ }
+static void end_rwprep(struct rfc2045ac * this_ptr)
--- /dev/null
+ +----------------------------------------------------------------------+
+ | PHP version 4.0 |
+ +----------------------------------------------------------------------+
+ | Copyright (c) 1997, 1998, 1999, 2000, 2001 The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 2.02 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available at through the world-wide-web at |
+ | http://www.php.net/license/2_02.txt. |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: |
+ | Wez Furlong (wez@thebrainroom.com) |
+ +----------------------------------------------------------------------+
+ */
+/* $Id$ */
+** Copyright 2000 Double Precision, Inc. See COPYING for
+** distribution information.
+#include "php.h"
+#include "php_mailparse.h"
+** ---------------------------------------------------------------------
+** Attempt to parse Content-Base: and Content-Location:, and return the
+** "base" of all the relative URLs in the section.
+** ---------------------------------------------------------------------
+static void get_method_path(const char *p,
+ const char **method,
+ unsigned *methodl,
+ const char **path)
+ unsigned i;
+ for (i=0; p && p[i]; i++)
+ {
+ if (p[i] == ':')
+ {
+ *method=p;
+ *methodl= ++i;
+ *path=p+i;
+ return;
+ }
+ if (!isalpha( (int)(unsigned char)p[i]))
+ break;
+ }
+ *method=0;
+ *methodl=0;
+ *path=p;
+char *rfc2045_append_url(const char *base, const char *loc)
+ const char *base_method;
+ unsigned base_method_l;
+ const char *base_path;
+ const char *loc_method;
+ unsigned loc_method_l;
+ const char *loc_path;
+ char *buf, *q;
+ get_method_path(base, &base_method, &base_method_l, &base_path);
+ get_method_path(loc, &loc_method, &loc_method_l, &loc_path);
+ if (loc_method_l)
+ {
+ buf = emalloc(strlen(loc)+1);
+ strcpy(buf, loc);
+ return (buf);
+ }
+ loc_method = base_method;
+ loc_method_l = base_method_l;
+ if (!base_path) base_path = "";
+ if (!loc_path) loc_path = "";
+ buf = emalloc(loc_method_l + strlen(base_path)+strlen(loc_path) + 3);
+ if (loc_method_l)
+ memcpy(buf, loc_method, loc_method_l);
+ buf[loc_method_l] = 0;
+ q=buf + loc_method_l;
+ strcat(strcpy(q, base_path), "/");
+ if ( loc_path[0] == '/')
+ {
+ char *r;
+ if (loc_path[1] == '/') {
+ *q=0; /* Location is absolute */
+ }
+ else if ( q[0] == '/' && q[1] == '/' && (r=strchr(q+2, '/')) != 0) {
+ *r=0; /* Relative to top of base */
+ }
+ else {
+ *q=0; /* No sys in base, just start with / */
+ }
+ }
+ strcat(q, loc_path);
+ return (buf);
+char *rfc2045_content_base(struct rfc2045 *p)
+ return (rfc2045_append_url(p->content_base, p->content_location));
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim: tw=78 ts=4 sw=4
+ */
--- /dev/null
+/* $Id$ */
+ ** Copyright 1998 - 1999 Double Precision, Inc. See COPYING for
+ ** distribution information.
+ */
+#include "php.h"
+#include "php_mailparse.h"
+static int op_func(int c, void *dat)
+ unsigned char C = (unsigned char)c;
+ struct rfc2045 * p = (struct rfc2045*)dat;
+ (*p->udecode_func)(&C, 1, p->misc_decode_ptr);
+ return c;
+void rfc2045_cdecode_start(struct rfc2045 *p,
+ rfc2045_decode_user_func_t u,
+ void *miscptr)
+ enum mbfl_no_encoding from = mbfl_no_encoding_8bit;
+ if (p->content_transfer_encoding)
+ {
+ from = mbfl_name2no_encoding(p->content_transfer_encoding);
+ if (from == mbfl_no_encoding_invalid) {
+ zend_error(E_WARNING, "%s(): I don't know how to decode %s transfer encoding!",
+ get_active_function_name(),
+ p->content_transfer_encoding);
+ from = mbfl_no_encoding_8bit;
+ }
+ }
+ p->misc_decode_ptr=miscptr;
+ p->udecode_func=u;
+ p->workbuflen=0;
+ if (from == mbfl_no_encoding_8bit)
+ p->decode_filter = NULL;
+ else
+ p->decode_filter = mbfl_convert_filter_new(
+ from, mbfl_no_encoding_8bit,
+ op_func,
+ p
+ );
+int rfc2045_cdecode_end(struct rfc2045 *p)
+ if (p->decode_filter)
+ {
+ mbfl_convert_filter_flush(p->decode_filter);
+ mbfl_convert_filter_delete(p->decode_filter);
+ p->decode_filter = NULL;
+ }
+ return 0;
+int rfc2045_cdecode(struct rfc2045 *p, const char *s, size_t l)
+ if (s && l)
+ {
+ int i;
+ if (p->decode_filter)
+ {
+ for (i=0; i<l; i++)
+ {
+ if (mbfl_convert_filter_feed(s[i], p->decode_filter) < 0)
+ return -1;
+ }
+ }
+ else
+ return ((*p->udecode_func)(s,l,p->misc_decode_ptr));
+ }
+ return (0);
--- /dev/null
+/* $Id$ */
+** Copyright 1998 - 1999 Double Precision, Inc. See COPYING for
+** distribution information.
+#include "php.h"
+#include "php_mailparse.h"
+static void decode(struct rfc2045id *topid,
+ struct rfc2045id **childidptr,
+ struct rfc2045 *r,
+ void (*func)(struct rfc2045 *, struct rfc2045id *, void *),
+ void *ptr)
+struct rfc2045id nextid;
+ *childidptr=0;
+ (*func)(r, topid, ptr);
+ *childidptr=&nextid;
+ nextid.idnum=1;
+ if (r->content_type && strncmp(r->content_type, "multipart/", 10) == 0)
+ nextid.idnum=0;
+ for (r=r->firstpart; r; r=r->next)
+ {
+ if (nextid.idnum)
+ decode(topid, &nextid.next, r, func, ptr);
+ ++nextid.idnum;
+ }
+void rfc2045_decode(struct rfc2045 *p,
+ void (*func)(struct rfc2045 *, struct rfc2045id *, void *),
+ void *ptr)
+struct rfc2045id topid;
+ topid.idnum=1;
+ decode(&topid, &topid.next, p, func, ptr);
--- /dev/null
+/* $Id$ */
+** Copyright 1998 - 1999 Double Precision, Inc. See COPYING for
+** distribution information.
+#include "php.h"
+#include "php_mailparse.h"
+struct rfc2045findstruct {
+ const char *partnum;
+ struct rfc2045 *ptr;
+} ;
+static void do_decode(struct rfc2045 *p, struct rfc2045id *id, void *ptr)
+ struct rfc2045findstruct *fs=(struct rfc2045findstruct *)ptr;
+ const char *partnum=fs->partnum;
+ unsigned n;
+ while (id)
+ {
+ if (!isdigit((int)(unsigned char)*partnum)) return;
+ n=0;
+ while (isdigit((int)(unsigned char)*partnum))
+ n=n*10 + *partnum++ - '0';
+ if (*partnum)
+ {
+ if (*partnum != '.') return;
+ ++partnum;
+ }
+ if (n != (unsigned)id->idnum) return;
+ id=id->next;
+ }
+ if ( *partnum == '\0') fs->ptr=p;
+struct rfc2045 *rfc2045_find(struct rfc2045 *p, const char *str)
+ struct rfc2045findstruct fs;
+ fs.partnum=str;
+ fs.ptr=0;
+ rfc2045_decode(p, &do_decode, &fs);
+ return (fs.ptr);
--- /dev/null
+/* $Id$ */
+ ** Copyright 1998 - 1999 Double Precision, Inc.
+ ** See COPYING for distribution information.
+ */
+#include "php.h"
+#include "php_mailparse.h"
+static void tokenize(const char *p, struct rfc822token *tokp, int *toklen,
+ void (*err_func)(const char *, int))
+ const char *addr=p;
+ int i=0;
+ int inbracket=0;
+ *toklen=0;
+ while (*p)
+ {
+ if (isspace((int)(unsigned char)*p))
+ {
+ p++;
+ i++;
+ continue;
+ }
+ switch (*p) {
+ int level;
+ case '(':
+ if (tokp)
+ {
+ tokp->token='(';
+ tokp->ptr=p;
+ tokp->len=0;
+ }
+ level=0;
+ for (;;)
+ {
+ if (!*p)
+ {
+ if (err_func) (*err_func)(addr, i);
+ if (tokp) tokp->token='"';
+ ++*toklen;
+ return;
+ }
+ if (*p == '(')
+ ++level;
+ if (*p == ')' && --level == 0)
+ {
+ p++;
+ i++;
+ if (tokp) tokp->len++;
+ break;
+ }
+ if (*p == '\\' && p[1])
+ {
+ p++;
+ i++;
+ if (tokp) tokp->len++;
+ }
+ i++;
+ if (tokp) tokp->len++;
+ p++;
+ }
+ if (tokp) ++tokp;
+ ++*toklen;
+ continue;
+ case '"':
+ p++;
+ i++;
+ if (tokp)
+ {
+ tokp->token='"';
+ tokp->ptr=p;
+ }
+ while (*p != '"')
+ {
+ if (!*p)
+ {
+ if (err_func) (*err_func)(addr, i);
+ ++*toklen;
+ return;
+ }
+ if (*p == '\\' && p[1])
+ {
+ if (tokp) tokp->len++;
+ p++;
+ i++;
+ }
+ if (tokp) tokp->len++;
+ p++;
+ i++;
+ }
+ ++*toklen;
+ if (tokp) ++tokp;
+ p++;
+ i++;
+ continue;
+ case '\\':
+ case ')':
+ if (err_func) (*err_func)(addr, i);
+ ++p;
+ ++i;
+ continue;
+ case '<':
+ case '>':
+ case '@':
+ case ',':
+ case ';':
+ case ':':
+ case '.':
+ case '[':
+ case ']':
+ case '%':
+ case '!':
+ case '?':
+ case '=':
+ case '/':
+ if ( (*p == '<' && inbracket) ||
+ (*p == '>' && !inbracket))
+ {
+ if (err_func) (*err_func)(addr, i);
+ ++p;
+ ++i;
+ continue;
+ }
+ if (*p == '<')
+ inbracket=1;
+ if (*p == '>')
+ inbracket=0;
+ if (tokp)
+ {
+ tokp->token= *p;
+ tokp->ptr=p;
+ tokp->len=1;
+ ++tokp;
+ }
+ ++*toklen;
+ if (*p == '<' && p[1] == '>')
+ /* Fake a null address */
+ {
+ if (tokp)
+ {
+ tokp->token=0;
+ tokp->ptr="";
+ tokp->len=0;
+ ++tokp;
+ }
+ ++*toklen;
+ }
+ ++p;
+ ++i;
+ continue;
+ default:
+ if (tokp)
+ {
+ tokp->token=0;
+ tokp->ptr=p;
+ tokp->len=0;
+ }
+ while (*p && !isspace((int)(unsigned char)*p) && strchr(
+ "<>@,;:.[]()%!\"\\?=/", *p) == 0)
+ {
+ if (tokp) ++tokp->len;
+ ++p;
+ ++i;
+ }
+ if (i == 0) /* Idiot check */
+ {
+ if (err_func) (*err_func)(addr, i);
+ if (tokp)
+ {
+ tokp->token='"';
+ tokp->ptr=p;
+ tokp->len=1;
+ ++tokp;
+ }
+ ++*toklen;
+ ++p;
+ ++i;
+ continue;
+ }
+ if (tokp) ++tokp;
+ ++*toklen;
+ }
+ }
+static void parseaddr(struct rfc822token *tokens, int ntokens,
+ struct rfc822addr *addrs, int *naddrs)
+ int flag, j, k;
+ struct rfc822token save_token;
+ *naddrs=0;
+ while (ntokens)
+ {
+ int i;
+ /* atoms (token=0) or quoted strings, followed by a : token
+ is a list name. */
+ for (i=0; i<ntokens; i++)
+ if (tokens[i].token && tokens[i].token != '"')
+ break;
+ if (i < ntokens && tokens[i].token == ':')
+ {
+ ++i;
+ if (addrs)
+ {
+ addrs->tokens=0;
+ addrs->name=i ? tokens:0;
+ for (j=1; j<i; j++)
+ addrs->name[j-1].next=addrs->name+j;
+ if (i)
+ addrs->name[i-1].next=0;
+ addrs++;
+ }
+ ++*naddrs;
+ tokens += i;
+ ntokens -= i;
+ continue; /* Group=phrase ":" */
+ }
+ /* Spurious commas are skipped, ;s are recorded */
+ if (tokens->token == ',' || tokens->token == ';')
+ {
+ if (tokens->token == ';')
+ {
+ if (addrs)
+ {
+ addrs->tokens=0;
+ addrs->name=tokens;
+ addrs->name->next=0;
+ addrs++;
+ }
+ ++*naddrs;
+ }
+ ++tokens;
+ --ntokens;
+ continue;
+ }
+ /* If we can find a '<' before the next comma or semicolon,
+ we have new style RFC path address */
+ for (i=0; i<ntokens && tokens[i].token != ';' &&
+ tokens[i].token != ',' &&
+ tokens[i].token != '<'; i++)
+ ;
+ if (i < ntokens && tokens[i].token == '<')
+ {
+ int j;
+ /* Ok -- what to do with the stuff before '>'???
+ If it consists exclusively of atoms, leave them alone.
+ Else, make them all a quoted string. */
+ for (j=0; j<i && (tokens[j].token == 0 ||
+ tokens[j].token == '('); j++)
+ ;
+ if (j == i)
+ {
+ if (addrs)
+ {
+ addrs->name= i ? tokens:0;
+ for (k=1; k<i; k++)
+ addrs->name[k-1].next=addrs->name+k;
+ if (i)
+ addrs->name[i-1].next=0;
+ }
+ }
+ else /* Intentionally corrupt the original toks */
+ {
+ if (addrs)
+ {
+ tokens->len= tokens[i-1].ptr
+ + tokens[i-1].len
+ - tokens->ptr;
+ /* We know that all the ptrs point
+ to parts of the same string. */
+ tokens->token='"';
+ /* Quoted string. */
+ addrs->name=tokens;
+ addrs->name->next=0;
+ }
+ }
+ /* Any comments in the name part are changed to quotes */
+ if (addrs)
+ {
+ struct rfc822token *t;
+ for (t=addrs->name; t; t=t->next)
+ if (t->token == '(')
+ t->token='"';
+ }
+ /* Now that's done and over with, see what can
+ be done with the <...> part. */
+ ++i;
+ tokens += i;
+ ntokens -= i;
+ for (i=0; i<ntokens && tokens[i].token != '>'; i++)
+ ;
+ if (addrs)
+ {
+ addrs->tokens=i ? tokens:0;
+ for (k=1; k<i; k++)
+ addrs->tokens[k-1].next=addrs->tokens+k;
+ if (i)
+ addrs->tokens[i-1].next=0;
+ ++addrs;
+ }
+ ++*naddrs;
+ tokens += i;
+ ntokens -= i;
+ if (ntokens) /* Skip the '>' token */
+ {
+ --ntokens;
+ ++tokens;
+ }
+ continue;
+ }
+ /* Ok - old style address. Assume the worst */
+ /* Try to figure out where the address ends. It ends upon:
+ a comma, semicolon, or two consecutive atoms. */
+ flag=0;
+ for (i=0; i<ntokens && tokens[i].token != ',' &&
+ tokens[i].token != ';'; i++)
+ {
+ if (tokens[i].token == '(') continue;
+ /* Ignore comments */
+ if (tokens[i].token == 0 || tokens[i].token == '"')
+ /* Atom */
+ {
+ if (flag) break;
+ flag=1;
+ }
+ else flag=0;
+ }
+ if (i == 0) /* Must be spurious comma, or something */
+ {
+ ++tokens;
+ --ntokens;
+ continue;
+ }
+ if (addrs)
+ {
+ addrs->name=0;
+ }
+ /* Ok, now get rid of embedded comments in the address.
+ Consider the last comment to be the real name */
+ if (addrs)
+ {
+ save_token.ptr=0;
+ save_token.len=0;
+ for (j=k=0; j<i; j++)
+ {
+ if (tokens[j].token == '(')
+ {
+ save_token=tokens[j];
+ continue;
+ }
+ tokens[k]=tokens[j];
+ k++;
+ }
+ if (save_token.ptr)
+ {
+ tokens[i-1]=save_token;
+ addrs->name=tokens+i-1;
+ addrs->name->next=0;
+ }
+ addrs->tokens=k ? tokens:NULL;
+ for (j=1; j<k; j++)
+ addrs->tokens[j-1].next=addrs->tokens+j;
+ if (k)
+ addrs->tokens[k-1].next=0;
+ ++addrs;
+ }
+ ++*naddrs;
+ tokens += i;
+ ntokens -= i;
+ }
+static void print_token(const struct rfc822token *token,
+ void (*print_func)(char, void *), void *ptr)
+ const char *p;
+ int n;
+ if (token->token == 0 || token->token == '(')
+ {
+ for (n=token->len, p=token->ptr; n; --n, ++p)
+ (*print_func)(*p, ptr);
+ return;
+ }
+ if (token->token != '"')
+ {
+ (*print_func)(token->token, ptr);
+ return;
+ }
+ (*print_func)('"', ptr);
+ n=token->len;
+ p=token->ptr;
+ while (n)
+ {
+ if (*p == '"' || (*p == '\\' && n == 1)) (*print_func)('\\', ptr);
+ if (*p == '\\' && n > 1)
+ {
+ (*print_func)('\\', ptr);
+ ++p;
+ --n;
+ }
+ (*print_func)(*p++, ptr);
+ --n;
+ }
+ (*print_func)('"', ptr);
+void mailparse_rfc822tok_print(const struct rfc822token *token,
+ void (*print_func)(char, void *), void *ptr)
+ int prev_isatom=0;
+ int isatom;
+ while (token)
+ {
+ isatom=mailparse_rfc822_is_atom(token->token);
+ if (prev_isatom && isatom)
+ (*print_func)(' ', ptr);
+ print_token(token, print_func, ptr);
+ prev_isatom=isatom;
+ token=token->next;
+ }
+void mailparse_rfc822_print(const struct rfc822a *rfcp, void (*print_func)(char, void *),
+ void (*print_separator)(const char *s, void *), void *ptr)
+ mailparse_rfc822_print_common(rfcp, 0, 0, print_func, print_separator, ptr);
+void mailparse_rfc822_print_common(const struct rfc822a *rfcp,
+ char *(*decode_func)(const char *, const char *), const char *chset,
+ void (*print_func)(char, void *),
+ void (*print_separator)(const char *, void *), void *ptr)
+ const struct rfc822addr *addrs=rfcp->addrs;
+ int naddrs=rfcp->naddrs;
+ while (naddrs)
+ {
+ if (addrs->tokens == 0)
+ {
+ mailparse_rfc822tok_print(addrs->name, print_func, ptr);
+ ++addrs;
+ --naddrs;
+ if (addrs[-1].name && naddrs)
+ {
+ struct rfc822token *t;
+ for (t=addrs[-1].name; t && t->next; t=t->next)
+ ;
+ if (t && (t->token == ':' || t->token == ';'))
+ (*print_separator)(" ", ptr);
+ }
+ continue;
+ }
+ else if (addrs->name && addrs->name->token == '(')
+ { /* old style */
+ char *p;
+ mailparse_rfc822tok_print(addrs->tokens, print_func, ptr);
+ (*print_func)(' ', ptr);
+ if (decode_func && (p=mailparse_rfc822_gettok(addrs->name))!=0)
+ {
+ char *q= (*decode_func)(p, chset);
+ char *r;
+ for (r=q; r && *r; r++)
+ (*print_func)( (int)(unsigned char)*r,
+ ptr);
+ if (q) efree(q);
+ efree(p);
+ }
+ else mailparse_rfc822tok_print(addrs->name, print_func, ptr);
+ }
+ else
+ {
+ int print_braces=0;
+ char *p;
+ if (addrs->name)
+ {
+ if (decode_func &&
+ (p=mailparse_rfc822_gettok(addrs->name)) != 0)
+ {
+ char *q= (*decode_func)(p, chset);
+ char *r;
+ for (r=q; r && *r; r++)
+ (*print_func)(
+ (int)(unsigned char)*r,
+ ptr);
+ if (q) efree(q);
+ efree(p);
+ }
+ else mailparse_rfc822tok_print(addrs->name,
+ print_func, ptr);
+ (*print_func)(' ', ptr);
+ print_braces=1;
+ }
+ else
+ {
+ struct rfc822token *p;
+ for (p=addrs->tokens; p && p->next; p=p->next)
+ if (mailparse_rfc822_is_atom(p->token) &&
+ mailparse_rfc822_is_atom(p->next->token))
+ print_braces=1;
+ }
+ if (print_braces)
+ (*print_func)('<', ptr);
+ mailparse_rfc822tok_print(addrs->tokens, print_func, ptr);
+ if (print_braces)
+ {
+ (*print_func)('>', ptr);
+ }
+ }
+ ++addrs;
+ --naddrs;
+ if (naddrs)
+ if (addrs->tokens || (addrs->name &&
+ mailparse_rfc822_is_atom(addrs->name->token)))
+ (*print_separator)(", ", ptr);
+ }
+void mailparse_rfc822t_free(struct rfc822t *p)
+ if (p->tokens) efree(p->tokens);
+ efree(p);
+void mailparse_rfc822a_free(struct rfc822a *p)
+ if (p->addrs) efree(p->addrs);
+ efree(p);
+void mailparse_rfc822_deladdr(struct rfc822a *rfcp, int index)
+ int i;
+ if (index < 0 || index >= rfcp->naddrs) return;
+ for (i=index+1; i<rfcp->naddrs; i++)
+ rfcp->addrs[i-1]=rfcp->addrs[i];
+ if (--rfcp->naddrs == 0)
+ {
+ efree(rfcp->addrs);
+ rfcp->addrs=0;
+ }
+struct rfc822t *mailparse_rfc822t_alloc(const char *addr,
+ void (*err_func)(const char *, int))
+ struct rfc822t *p=(struct rfc822t *)emalloc(sizeof(struct rfc822t));
+ if (!p) return (NULL);
+ memset(p, 0, sizeof(*p));
+ tokenize(addr, NULL, &p->ntokens, err_func);
+ p->tokens=p->ntokens ? (struct rfc822token *)
+ ecalloc(p->ntokens, sizeof(struct rfc822token)):0;
+ if (p->ntokens && !p->tokens)
+ {
+ mailparse_rfc822t_free(p);
+ return (NULL);
+ }
+ tokenize(addr, p->tokens, &p->ntokens, NULL);
+ return (p);
+struct rfc822a *mailparse_rfc822a_alloc(struct rfc822t *t)
+ struct rfc822a *p=(struct rfc822a *)emalloc(sizeof(struct rfc822a));
+ if (!p) return (NULL);
+ memset(p, 0, sizeof(*p));
+ parseaddr(t->tokens, t->ntokens, NULL, &p->naddrs);
+ p->addrs=p->naddrs ? (struct rfc822addr *)
+ ecalloc(p->naddrs, sizeof(struct rfc822addr)):0;
+ if (p->naddrs && !p->addrs)
+ {
+ mailparse_rfc822a_free(p);
+ return (NULL);
+ }
+ parseaddr(t->tokens, t->ntokens, p->addrs, &p->naddrs);
+ return (p);
+void mailparse_rfc822_praddr(const struct rfc822a *rfcp, int index,
+ void (*print_func)(char, void *), void *ptr)
+ const struct rfc822addr *addrs;
+ if (index < 0 || index >= rfcp->naddrs) return;
+ addrs=rfcp->addrs+index;
+ if (addrs->tokens)
+ {
+ mailparse_rfc822tok_print(addrs->tokens, print_func, ptr);
+ (*print_func)('\n', ptr);
+ }
+void mailparse_rfc822_addrlist(const struct rfc822a *rfcp,
+ void (*print_func)(char, void *), void *ptr)
+ int i;
+ for (i=0; i<rfcp->naddrs; i++)
+ mailparse_rfc822_praddr(rfcp, i, print_func, ptr);
+void mailparse_rfc822_prname(const struct rfc822a *rfcp, int index,
+ void (*print_func)(char, void *), void *ptr)
+ const struct rfc822addr *addrs;
+ if (index < 0 || index >= rfcp->naddrs) return;
+ addrs=rfcp->addrs+index;
+ if (!addrs->tokens) return;
+ mailparse_rfc822_prname_orlist(rfcp, index, print_func, ptr);
+void mailparse_rfc822_prname_orlist(const struct rfc822a *rfcp, int index,
+ void (*print_func)(char, void *), void *ptr)
+ const struct rfc822addr *addrs;
+ if (index < 0 || index >= rfcp->naddrs) return;
+ addrs=rfcp->addrs+index;
+ if (addrs->name)
+ {
+ struct rfc822token *i;
+ int n;
+ int prev_isatom=0;
+ int isatom=0;
+ for (i=addrs->name; i; i=i->next, prev_isatom=isatom)
+ {
+ isatom=mailparse_rfc822_is_atom(i->token);
+ if (isatom && prev_isatom)
+ (*print_func)(' ', ptr);
+ if (i->token != '(')
+ {
+ print_token(i, print_func, ptr);
+ continue;
+ }
+ for (n=2; n<i->len; n++)
+ (*print_func)(i->ptr[n-1], ptr);
+ }
+ } else
+ mailparse_rfc822tok_print(addrs->tokens, print_func, ptr);
+ (*print_func)('\n', ptr);
+void mailparse_rfc822_namelist(const struct rfc822a *rfcp,
+ void (*print_func)(char, void *), void *ptr)
+ int i;
+ for (i=0; i<rfcp->naddrs; i++)
+ mailparse_rfc822_prname(rfcp, i, print_func, ptr);
--- /dev/null
+/* $Id$ */
+ ** Copyright 1998 - 1999 Double Precision, Inc.
+ ** See COPYING for distribution information.
+ */
+#include "php.h"
+#include "php_mailparse.h"
+static void cntlen(char c, void *p)
+ if (c != '\n')
+ ++ *(size_t *)p;
+static void saveaddr(char c, void *p)
+ if (c != '\n')
+ {
+ char **cp=(char **)p;
+ *(*cp)++=c;
+ }
+char *mailparse_rfc822_getaddr(const struct rfc822a *rfc, int n)
+ size_t addrbuflen=0;
+ char *addrbuf, *ptr;
+ mailparse_rfc822_praddr(rfc, n, &cntlen, &addrbuflen);
+ if (!(addrbuf=emalloc(addrbuflen+1)))
+ return (0);
+ ptr=addrbuf;
+ mailparse_rfc822_praddr(rfc, n, &saveaddr, &ptr);
+ addrbuf[addrbuflen]=0;
+ return (addrbuf);
+char *mailparse_rfc822_getname(const struct rfc822a *rfc, int n)
+ char *p, *q;
+ size_t addrbuflen=0;
+ char *addrbuf, *ptr;
+ mailparse_rfc822_prname(rfc, n, &cntlen, &addrbuflen);
+ if (!(addrbuf=emalloc(addrbuflen+1)))
+ return (0);
+ ptr=addrbuf;
+ mailparse_rfc822_prname(rfc, n, &saveaddr, &ptr);
+ addrbuf[addrbuflen]=0;
+ /* Get rid of surrounding quotes */
+ for (p=q=addrbuf; *p; p++)
+ if (*p != '"') *q++=*p;
+ *q=0;
+ return (addrbuf);
+char *mailparse_rfc822_getname_orlist(const struct rfc822a *rfc, int n)
+ char *p, *q;
+ size_t addrbuflen=0;
+ char *addrbuf, *ptr;
+ mailparse_rfc822_prname_orlist(rfc, n, &cntlen, &addrbuflen);
+ if (!(addrbuf=emalloc(addrbuflen+1)))
+ return (0);
+ ptr=addrbuf;
+ mailparse_rfc822_prname_orlist(rfc, n, &saveaddr, &ptr);
+ addrbuf[addrbuflen]=0;
+ /* Get rid of surrounding quotes */
+ for (p=q=addrbuf; *p; p++)
+ if (*p != '"') *q++=*p;
+ *q=0;
+ return (addrbuf);
+char *mailparse_rfc822_gettok(const struct rfc822token *t)
+ size_t addrbuflen=0;
+ char *addrbuf, *ptr;
+ mailparse_rfc822tok_print(t, &cntlen, &addrbuflen);
+ if (!(addrbuf=emalloc(addrbuflen+1)))
+ return (0);
+ ptr=addrbuf;
+ mailparse_rfc822tok_print(t, &saveaddr, &ptr);
+ addrbuf[addrbuflen]=0;
+ return (addrbuf);
--- /dev/null
+/* $Id$ */
+** Copyright 1998 - 1999 Double Precision, Inc.
+** See COPYING for distribution information.
+#include "php.h"
+#include "php_mailparse.h"
+static void cntlen(char c, void *p)
+ c=c;
+ ++ *(size_t *)p;
+static void cntlensep(const char *p, void *ptr)
+ while (*p) cntlen(*p++, ptr);
+static void saveaddr(char c, void *ptr)
+ *(*(char **)ptr)++=c;
+static void saveaddrsep(const char *p, void *ptr)
+ while (*p) saveaddr(*p++, ptr);
+char *mailparse_rfc822_getaddrs(const struct rfc822a *rfc)
+size_t addrbuflen=0;
+char *addrbuf, *ptr;
+ mailparse_rfc822_print(rfc, &cntlen, &cntlensep, &addrbuflen);
+ if (!(addrbuf=emalloc(addrbuflen+1)))
+ return (0);
+ ptr=addrbuf;
+ mailparse_rfc822_print(rfc, &saveaddr, &saveaddrsep, &ptr);
+ addrbuf[addrbuflen]=0;
+ return (addrbuf);
+static void saveaddrsep_wrap(const char *p, void *ptr)
+int c;
+ while ((c=*p++) != 0)
+ {
+ if (c == ' ') c='\n';
+ saveaddr(c, ptr);
+ }
+char *mailparse_rfc822_getaddrs_wrap(const struct rfc822a *rfc, int w)
+size_t addrbuflen=0;
+char *addrbuf, *ptr, *start, *lastnl;
+ mailparse_rfc822_print(rfc, &cntlen, &cntlensep, &addrbuflen);
+ if (!(addrbuf=emalloc(addrbuflen+1)))
+ return (0);
+ ptr=addrbuf;
+ mailparse_rfc822_print(rfc, &saveaddr, &saveaddrsep_wrap, &ptr);
+ addrbuf[addrbuflen]=0;
+ for (lastnl=0, start=ptr=addrbuf; *ptr; )
+ {
+ while (*ptr && *ptr != '\n') ptr++;
+ if (ptr-start < w)
+ {
+ if (lastnl) *lastnl=' ';
+ lastnl=ptr;
+ if (*ptr) ++ptr;
+ }
+ else
+ {
+ if (lastnl)
+ start=lastnl+1;
+ else
+ {
+ start=ptr+1;
+ if (*ptr) ++ptr;
+ }
+ lastnl=0;
+ }
+ }
+ return (addrbuf);
--- /dev/null
+Check for mailparse presence
+<?php if (!extension_loaded("mailparse")) print "skip"; ?>
+echo "mailparse extension is available";
+mailparse extension is available
--- /dev/null
+Check stream encoding
+<?php if (!extension_loaded("mailparse")) print "skip"; ?>
+$text = <<<EOD
+hello, this is some text=hello.
+$fp = tmpfile();
+fwrite($fp, $text);
+$dest = tmpfile();
+mailparse_stream_encode($fp, $dest, "quoted-printable");
+$data = fread($dest, 2048);
+echo $data;
+hello, this is some text=3Dhello.
--- /dev/null
+ * This is a simple email viewer.
+ * make sure that $filename points to a file containing an email message and
+ * load this page in your browser.
+ * You will be able to choose a part to view.
+ * */
+#$filename = "/home/CLIENTWEB/worlddo/mimetests/namib.rfc822";
+$filename = "/home/CLIENTWEB/worlddo/mimetests/uumsg";
+#$filename = "/home/CLIENTWEB/worlddo/mimetests/segblob.txt";
+#$filename = "yourmessage.txt";
+/* parse the message and return a mime message resource */
+$mime = mailparse_msg_parse_file($filename);
+/* return an array of message parts - this contsists of the names of the parts
+ * only */
+$struct = mailparse_msg_get_structure($mime);
+echo "<table>\n";
+/* print a choice of sections */
+foreach($struct as $st) {
+ echo "<tr>\n";
+ echo "<td><a href=\"$PHP_SELF?showpart=$st\">$st</a></td>\n";
+ /* get a handle on the message resource for a subsection */
+ $section = mailparse_msg_get_part($mime, $st);
+ /* get content-type, encoding and header information for that section */
+ $info = mailparse_msg_get_part_data($section);
+ echo "\n";
+ echo "<td>" . $info["content-type"] . "</td>\n";
+ echo "<td>" . $info["content-disposition"] . "</td>\n";
+ echo "<td>" . $info["disposition-filename"] . "</td>\n";
+ echo "<td>" . $info["charset"] . "</td>\n";
+ echo "</tr>";
+echo "</table>";
+/* if we were called to display a part, do so now */
+if ($showpart) {
+ /* get a handle on the message resource for the desired part */
+ $sec = mailparse_msg_get_part($mime, $showpart);
+ echo "<table border=1><tr><th>Section $showpart</th></tr><tr><td>";
+ ob_start();
+ /* extract the part from the message file and dump it to the output buffer
+ * */
+ mailparse_msg_extract_part_file($sec, $filename);
+ $contents = ob_get_contents();
+ ob_end_clean();
+ /* quote the message for safe display in a browser */
+ echo nl2br(htmlentities($contents)) . "</td></tr></table>";;