From: Wez Furlong Date: Sun, 20 May 2001 11:11:28 +0000 (+0000) Subject: Imported mailparse extension X-Git-Tag: PRE_GRANULAR_GARBAGE_FIX~330 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6468f8b492040c8e973608fa41c5c64a6e2434b2;p=php Imported mailparse extension @- New mailparse extension for parsing and manipulating MIME mail (Wez) --- diff --git a/ext/mailparse/CREDITS b/ext/mailparse/CREDITS new file mode 100644 index 0000000000..02afd3368b --- /dev/null +++ b/ext/mailparse/CREDITS @@ -0,0 +1,2 @@ +Mailparse MIME parsing and manipulation functions +Wez Furlong diff --git a/ext/mailparse/EXPERIMENTAL b/ext/mailparse/EXPERIMENTAL new file mode 100644 index 0000000000..6443e99646 --- /dev/null +++ b/ext/mailparse/EXPERIMENTAL @@ -0,0 +1,5 @@ +this extension is experimental, +its functions may change their names +or move to extension all together +so do not rely to much on them +you have been warned! diff --git a/ext/mailparse/Makefile.in b/ext/mailparse/Makefile.in new file mode 100644 index 0000000000..72e38c7492 --- /dev/null +++ b/ext/mailparse/Makefile.in @@ -0,0 +1,15 @@ +# $Id$ + +LTLIBRARY_NAME = libmailparse.la +LTLIBRARY_SOURCES = \ + mailparse.c rfc2045.c \ + rfc2045acchk.c rfc2045acprep.c \ + rfc2045appendurl.c rfc2045cdecode.c rfc2045decode.c \ + rfc2045find.c \ + rfc822.c rfc822_getaddr.c \ + rfc822_getaddrs.c + +LTLIBRARY_SHARED_NAME = mailparse.la +LTLIBRARY_SHARED_LIBADD = $(MAILPARSE_SHARED_LIBADD) + +include $(top_srcdir)/build/dynlib.mk diff --git a/ext/mailparse/README b/ext/mailparse/README new file mode 100644 index 0000000000..8a3ee3b215 --- /dev/null +++ b/ext/mailparse/README @@ -0,0 +1,79 @@ +mailparse library for PHP 4 +=========================== + +This library is build upon the librfc822 and librfc2045 libraries that +originated from the maildrop component of the courier mail server. +The copyright for most of the work belongs to Double Precision Inc., +although distribution of the library is carried out under the terms of the +GPL. + +Example for PHP: +============= + +$file = "/path/to/rfc822/compliant/message"; + +$mime = mailparse_rfc2045_parse_file($file); +$ostruct = mailparse_rfc2045_getstructure($mime); +foreach($ostruct as $st) { + $section = mailparse_rfc2045_find($mime, $st); + $struct[$st] = mailparse_rfc2045_getinfo($section); +} +var_dump($struct); + +array mailparse_rfc822_parse_addresses(string addresses) + parses an rfc822 compliant recipient list, such as that found in To: From: + headers. Returns a indexed array of assoc. arrays for each recipient: + array(0 => array("display" => "Wez Furlong", "address" => "wez@php.net")) + +resource mailparse_rfc2045_create() + Create a mime mail resource + +boolean mailparse_rfc2045_parse(resource mimemail, string data) + incrementally parse data into the supplied mime mail resource. + Concept: you can stream portions of a file at a time, rather than read + and parse the whole thing. + + +resource mailparse_rfc2045_parse_file(string $filename) + Parse a file and return a $mime resource. + The file is opened and streamed through the parser. + This is the optimal way of parsing a mail file that + you have on disk. + + +array mailparse_rfc2045_getstructure(resource mimemail) + returns an array containing a list of message parts in the form: + array("1", "1.1", "1.2") + +resource mailparse_rfc2045_find(resource mimemail, string partname) + returns an mime mail resource representing the named section + +array mailparse_rfc2045_getinfo(resource mimemail) + returns an array containing the bounds, content type and headers of the + section. + +mailparse_rfc2045_extract_file(resource mimemail, string filename[, string + callbackfunc]) + Extracts/decodes a message section from the supplied filename. + If no callback func is supplied, it outputs the results into the current + output buffer, otherwise it calls the callback with a string parameter + containing the text. + The contents of the section will be decoded according to their transfer + encoding - base64, quoted-printable and uuencoded text are supported. + +All operations are done incrementally; streaming the input and output so that +memory usage is on the whole lower than something like procmail or doing this +stuff in PHP space. The aim is that it stays this way to handle large +quantities of email. + +TODO: +===== + +. Add support for binhex encoding? +. Extracting a message part without decoding the transfer encoding so that + eg: pgp-signatures can be verified. + +. Work the other way around - build up a rfc2045 compliant message file from + simple structure information and filenames/variables. + +vim:tw=78 diff --git a/ext/mailparse/config.m4 b/ext/mailparse/config.m4 new file mode 100644 index 0000000000..4e2c4f7d60 --- /dev/null +++ b/ext/mailparse/config.m4 @@ -0,0 +1,13 @@ +dnl $Id$ +dnl config.m4 for extension mailparse + +PHP_ARG_ENABLE(mailparse, whether to enable mailparse support, +[ --enable-mailparse Enable mailparse support]) + +if test "$PHP_MAILPARSE" != "no"; then + if test "$ext_shared" != "yes" && test "$enable_mbstring" != "yes"; then + AC_MSG_WARN(Activating mbstring) + enable_mbstring=yes + fi + PHP_EXTENSION(mailparse, $ext_shared) +fi diff --git a/ext/mailparse/libs.mk b/ext/mailparse/libs.mk new file mode 100644 index 0000000000..7c6e176717 --- /dev/null +++ b/ext/mailparse/libs.mk @@ -0,0 +1,7 @@ +include $(top_builddir)/config_vars.mk +LTLIBRARY_OBJECTS = $(LTLIBRARY_SOURCES:.c=.lo) $(LTLIBRARY_OBJECTS_X) +LTLIBRARY_SHARED_OBJECTS = $(LTLIBRARY_OBJECTS:.lo=.slo) +$(LTLIBRARY_NAME): $(LTLIBRARY_OBJECTS) $(LTLIBRARY_DEPENDENCIES) + $(LINK) $(LTLIBRARY_LDFLAGS) $(LTLIBRARY_OBJECTS) $(LTLIBRARY_LIBADD) + +targets = $(LTLIBRARY_NAME) diff --git a/ext/mailparse/mailparse.c b/ext/mailparse/mailparse.c new file mode 100755 index 0000000000..f8d2514e38 --- /dev/null +++ b/ext/mailparse/mailparse.c @@ -0,0 +1,799 @@ +/* + +----------------------------------------------------------------------+ + | PHP version 4.0 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997, 1998, 1999, 2000, 2001 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 2.02 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available at through the world-wide-web at | + | http://www.php.net/license/2_02.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: | + | Wez Furlong (wez@thebrainroom.com) | + | Credit also given to Double Precision Inc. who wrote the code that | + | the support routines for this extension were based upon. | + +----------------------------------------------------------------------+ + */ +/* $Id$ */ + +#include "php.h" +#include "php_ini.h" +#include "ext/standard/file.h" +#include "php_mailparse.h" +#include "mailparse_rfc822.h" +#include "ext/standard/info.h" +#include "ext/standard/php_output.h" + +/* just in case the config check doesn't enable mbstring automatically */ +#if !HAVE_MBSTRING +#error The mailparse extension requires the mbstring extension! +#endif + +#include "ext/mbstring/mbfilter.h" + +static int le_rfc2045; +/* this is for sections we "found": we mustn't free them, as this will cause + * a SEGFAULT when the parent is freed */ +static int le_rfc2045_nofree; + + +function_entry mailparse_functions[] = { + PHP_FE(mailparse_msg_parse_file, NULL) + PHP_FE(mailparse_msg_get_part, NULL) + PHP_FE(mailparse_msg_get_structure, NULL) + PHP_FE(mailparse_msg_get_part_data, NULL) + PHP_FE(mailparse_msg_extract_part, NULL) + PHP_FE(mailparse_msg_extract_part_file, NULL) + + PHP_FE(mailparse_msg_create, NULL) + PHP_FE(mailparse_msg_free, NULL) + PHP_FE(mailparse_msg_parse, NULL) + PHP_FE(mailparse_rfc822_parse_addresses, NULL) + PHP_FE(mailparse_determine_best_xfer_encoding, NULL) + PHP_FE(mailparse_stream_encode, NULL) + + {NULL, NULL, NULL} +}; + +zend_module_entry mailparse_module_entry = { + "mailparse", + mailparse_functions, + PHP_MINIT(mailparse), + PHP_MSHUTDOWN(mailparse), + PHP_RINIT(mailparse), + PHP_RSHUTDOWN(mailparse), + PHP_MINFO(mailparse), + STANDARD_MODULE_PROPERTIES +}; + +ZEND_DECLARE_MODULE_GLOBALS(mailparse) + +#ifdef COMPILE_DL_MAILPARSE +ZEND_GET_MODULE(mailparse) +#endif + + +ZEND_RSRC_DTOR_FUNC(rfc2045_dtor) +{ + rfc2045_free(rsrc->ptr); +} + +PHP_INI_BEGIN() + STD_PHP_INI_ENTRY("mailparse.def_charset", RFC2045CHARSET, PHP_INI_ALL, OnUpdateString, def_charset, zend_mailparse_globals, mailparse_globals) +PHP_INI_END() + +#define mailparse_msg_name "mailparse_mail_structure" + +#define mailparse_fetch_rfc2045_resource(rfcvar, zvalarg) ZEND_FETCH_RESOURCE2(rfcvar, struct rfc2045 *, zvalarg, -1, mailparse_msg_name, le_rfc2045, le_rfc2045_nofree) + +PHP_MINIT_FUNCTION(mailparse) +{ + le_rfc2045 = zend_register_list_destructors_ex(rfc2045_dtor, NULL, mailparse_msg_name, module_number); + le_rfc2045_nofree = zend_register_list_destructors_ex(NULL, NULL, mailparse_msg_name, module_number); + + REGISTER_INI_ENTRIES(); + + return SUCCESS; +} + +PHP_MSHUTDOWN_FUNCTION(mailparse) +{ + UNREGISTER_INI_ENTRIES(); + return SUCCESS; +} + +PHP_MINFO_FUNCTION(mailparse) +{ + php_info_print_table_start(); + php_info_print_table_header(2, "mailparse support", "enabled"); + php_info_print_table_end(); + + DISPLAY_INI_ENTRIES(); +} + + +PHP_RINIT_FUNCTION(mailparse) +{ + return SUCCESS; +} + + +PHP_RSHUTDOWN_FUNCTION(mailparse) +{ + return SUCCESS; +} + +static void mailparse_rfc822t_errfunc(const char * msg, int num) +{ + php_error(E_WARNING, "%s(): %s %d", get_active_function_name(), msg, num); +} + +/* {{{ proto array mailparse_rfc822_parse_addresses(string addresses) + parse addresses and return a hash containing that data +*/ +PHP_FUNCTION(mailparse_rfc822_parse_addresses) +{ + zval ** addresses; + struct rfc822t * tokens; + struct rfc822a * addrs; + int i; + + if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &addresses) == FAILURE) { + WRONG_PARAM_COUNT; + } + convert_to_string_ex(addresses); + + tokens = mailparse_rfc822t_alloc(Z_STRVAL_PP(addresses), mailparse_rfc822t_errfunc); + + if (tokens) { + addrs = mailparse_rfc822a_alloc(tokens); + if (addrs) { + + array_init(return_value); + + for (i = 0; i < addrs->naddrs; i++) { + char * p; + zval * item; + + MAKE_STD_ZVAL(item); + + if (array_init(item) == FAILURE) + break; + + p = mailparse_rfc822_getname(addrs, i); + add_assoc_string(item, "display", p, 0); /* don't duplicate - getname allocated the memory for us */ + p = mailparse_rfc822_getaddr(addrs, i); + add_assoc_string(item, "address", p, 0); /* don't duplicate - getaddr allocated the memory for us */ + + /* add this to the result */ + zend_hash_next_index_insert(HASH_OF(return_value), &item, sizeof(item), NULL); + } + + mailparse_rfc822a_free(addrs); + } + + mailparse_rfc822t_free(tokens); + } +} +/* }}} */ + +/* {{{ proto long mailparse_determine_best_xfer_encoding(resource fp) + figure out the best way of encoding the content read from the file pointer fp, which must be seek-able. +*/ +PHP_FUNCTION(mailparse_determine_best_xfer_encoding) +{ + zval ** file; + FILE * fp; + int longline = 0; + int linelen = 0; + int c; + enum mbfl_no_encoding bestenc = mbfl_no_encoding_7bit; + void * what; + int type; + char * name; + + if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &file) == FAILURE) { + WRONG_PARAM_COUNT; + } + + what = zend_fetch_resource(file, -1, "File-Handle", &type, 2, php_file_le_fopen(), php_file_le_stream()); + ZEND_VERIFY_RESOURCE(what); + +#if HAVE_PHP_STREAM + if (type == php_file_le_stream()) { + php_stream * stream = (php_stream*)what; + + php_stream_rewind(stream); + while(!php_stream_eof(stream)) { + c = php_stream_getc(stream); + if (c == EOF) + break; + if (c > 0x80) + bestenc = mbfl_no_encoding_8bit; + else if (c == 0) { + bestenc = mbfl_no_encoding_base64; + longline = 0; + break; + } + if (c == '\n') + linelen = 0; + else if (++linelen > 200) + longline = 1; + } + if (longline) + bestenc = mbfl_no_encoding_qprint; + php_stream_rewind(stream); + } + else { +#endif + fp = (FILE*)what; + + rewind(fp); + while(!feof(fp)) { + c = fgetc(fp); + if (c == EOF) + break; + if (c > 0x80) + bestenc = mbfl_no_encoding_8bit; + else if (c == 0) { + bestenc = mbfl_no_encoding_base64; + longline = 0; + break; + } + if (c == '\n') + linelen = 0; + else if (++linelen > 200) + longline = 1; + } + if (longline) + bestenc = mbfl_no_encoding_qprint; + rewind(fp); +#if HAVE_PHP_STREAM + } +#endif + + name = (char *)mbfl_no2preferred_mime_name(bestenc); + if (name) + { + RETVAL_STRING(name, 1); + } + else + { + RETVAL_FALSE; + } +} +/* }}} */ + +/* {{{ proto boolean mailparse_stream_encode(resource sourcefp, resource destfp, string encoding) + stream data from source file pointer, apply encoding and write to destfp +*/ + +static int mailparse_fp_output(int c, void * fp) +{ + return fputc(c, (FILE*)fp); +} +static int mailparse_fp_flush(void * fp) +{ + return fflush((FILE*)fp); +} + +PHP_FUNCTION(mailparse_stream_encode) +{ + zval ** srcfile, ** destfile, ** encod; + FILE * srcfp, * destfp; + char * buf; + size_t len; + size_t bufsize = 2048; + enum mbfl_no_encoding enc; + mbfl_convert_filter * conv = NULL; + + if (ZEND_NUM_ARGS() != 3 || zend_get_parameters_ex(3, &srcfile, &destfile, &encod) == FAILURE) { + WRONG_PARAM_COUNT; + } + + if ((*srcfile)->type == IS_RESOURCE && (*srcfile)->value.lval == 0) { + RETURN_FALSE; + } + ZEND_FETCH_RESOURCE(srcfp, FILE *, srcfile, -1, "File-Handle", php_file_le_fopen()); + + if ((*destfile)->type == IS_RESOURCE && (*destfile)->value.lval == 0) { + RETURN_FALSE; + } + ZEND_FETCH_RESOURCE(destfp, FILE *, destfile, -1, "File-Handle", php_file_le_fopen()); + + convert_to_string_ex(encod); + enc = mbfl_name2no_encoding(Z_STRVAL_PP(encod)); + if (enc == mbfl_no_encoding_invalid) { + zend_error(E_WARNING, "%s(): unknown encoding \"%s\"", + get_active_function_name(), + Z_STRVAL_PP(encod) + ); + RETURN_FALSE; + } + + convert_to_long_ex(encod); + enc = Z_LVAL_PP(encod); + + buf = emalloc(bufsize); + RETVAL_TRUE; + + conv = mbfl_convert_filter_new(mbfl_no_encoding_8bit, + enc, + mailparse_fp_output, + mailparse_fp_flush, + destfp + ); + while(!feof(srcfp)) { + len = fread(buf, sizeof(char), bufsize, srcfp); + if (len > 0) + { + int i; + for (i=0; itype == IS_RESOURCE && (*arg)->value.lval == 0) { + RETURN_FALSE; + } + + mailparse_fetch_rfc2045_resource(rfcbuf, arg); + + convert_to_string_ex(data); + + rfc2045_parse(rfcbuf, (*data)->value.str.val, (*data)->value.str.len); +} +/* }}} */ + +/* {{{ proto resource mailparse_msg_parse_file(string filename) + Parse file and return a resource representing the structure +*/ +PHP_FUNCTION(mailparse_msg_parse_file) +{ + zval ** filename; + struct rfc2045 * rfcbuf; + char * filebuf; + FILE * fp; + + if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &filename) == FAILURE) { + WRONG_PARAM_COUNT; + } + + convert_to_string_ex(filename); + + /* open file and read it in */ + fp = VCWD_FOPEN(Z_STRVAL_PP(filename), "r"); + if (fp == NULL) { + zend_error(E_WARNING, "%s(): unable to open file %s", get_active_function_name(), Z_STRVAL_PP(filename)); + RETURN_FALSE; + } + + filebuf = emalloc(MAILPARSE_BUFSIZ); + + rfcbuf = rfc2045_alloc_ac(); + if (rfcbuf) { + ZEND_REGISTER_RESOURCE(return_value, rfcbuf, le_rfc2045); + + while(!feof(fp)) { + int got = fread(filebuf, sizeof(char), MAILPARSE_BUFSIZ, fp); + if (got > 0) { + rfc2045_parse(rfcbuf, filebuf, got); + } + } + fclose(fp); + } + efree(filebuf); +} +/* }}} */ + +/* {{{ proto void mailparse_msg_free(resource rfc2045buf) + Frees a handle allocated by mailparse_msg_create +*/ +PHP_FUNCTION(mailparse_msg_free) +{ + zval **arg; + struct rfc2045 * rfcbuf; + + if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg) == FAILURE) { + WRONG_PARAM_COUNT; + } + + if ((*arg)->type == IS_RESOURCE && (*arg)->value.lval == 0) { + RETURN_FALSE; + } + + ZEND_FETCH_RESOURCE(rfcbuf, struct rfc2045 *, arg, -1, mailparse_msg_name, le_rfc2045); + + zend_list_delete((*arg)->value.lval); + RETURN_TRUE; +} +/* }}} */ + + +/* {{{ proto long mailparse_msg_create() + Return a handle that can be used to parse a message +*/ +PHP_FUNCTION(mailparse_msg_create) +{ + struct rfc2045 * rfcbuf; + + rfcbuf = rfc2045_alloc_ac(); + + ZEND_REGISTER_RESOURCE(return_value, rfcbuf, le_rfc2045); +} +/* }}} */ + +static void get_structure_callback(struct rfc2045 *p, struct rfc2045id * id, void * ptr) +{ + zval * return_value = (zval *)ptr; + char intbuf[16]; + char buf[256]; + int len, i = 0; + while(id && i < sizeof(buf)) { + sprintf(intbuf, "%d", id->idnum); + len = strlen(intbuf); + if (len > (sizeof(buf)-i)) { + /* too many sections: bail */ + zend_error(E_WARNING, "%s(): too many nested sections in message", get_active_function_name()); + return; + } + sprintf(&buf[i], "%s%c", intbuf, id->next ? '.' : '\0'); + i += len + (id->next ? 1 : 0); + id = id->next; + } + add_next_index_string(return_value, buf,1); +} + +/* {{{ proto array mailparse_msg_get_structure(resource rfc2045) + Returns an array of mime section names in the supplied message +*/ +PHP_FUNCTION(mailparse_msg_get_structure) +{ + zval **arg; + struct rfc2045 * rfcbuf; + + if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg) == FAILURE) { + WRONG_PARAM_COUNT; + } + + if ((*arg)->type == IS_RESOURCE && (*arg)->value.lval == 0) { + RETURN_FALSE; + } + + mailparse_fetch_rfc2045_resource(rfcbuf, arg); + + if (array_init(return_value) == FAILURE) { + RETURN_FALSE; + } + + rfc2045_decode(rfcbuf, &get_structure_callback, return_value); +} +/* }}} */ + +/* callback for decoding using a "userdefined" php function */ +static int extract_callback_user_func(const char *p, size_t n, zval *userfunc) +{ + zval * retval; + zval * arg; + ELS_FETCH(); + + MAKE_STD_ZVAL(retval); + retval->type = IS_BOOL; + retval->value.lval = 0; + + MAKE_STD_ZVAL(arg); + ZVAL_STRINGL(arg, (char*)p, (int)n, 1); + + /* TODO: use zend_is_callable */ + + if (call_user_function(EG(function_table), NULL, userfunc, retval, 1, &arg) == FAILURE) + zend_error(E_WARNING, "%s(): unable to call user function", get_active_function_name()); + + zval_dtor(retval); + zval_dtor(arg); + efree(retval); + efree(arg); + + return 0; +} + +/* callback for decoding to the current output buffer */ +static int extract_callback_stdout(const char *p, size_t n, void *ptr) +{ + ZEND_WRITE(p, n); + return 0; +} + +/* {{{ proto void mailparse_msg_extract_part(resource rfc2045, string msgbody[, string callbackfunc]) + Extracts/decodes a message section. If callbackfunc is not specified, the contents will be sent to "stdout". +*/ +PHP_FUNCTION(mailparse_msg_extract_part) +{ + zval **arg, **bodystr, **cbfunc; + struct rfc2045 * rfcbuf; + off_t start, end, body; + off_t nlines; + off_t nbodylines; + + switch(ZEND_NUM_ARGS()) { + case 3: + if (zend_get_parameters_ex(3, &arg, &bodystr, &cbfunc) == FAILURE) { + WRONG_PARAM_COUNT; + } + if (Z_TYPE_PP(cbfunc) != IS_ARRAY) + convert_to_string_ex(cbfunc); + break; + case 2: + if (zend_get_parameters_ex(2, &arg, &bodystr) == FAILURE) { + WRONG_PARAM_COUNT; + } + cbfunc = NULL; + break; + } + convert_to_string_ex(bodystr); + + if ((*arg)->type == IS_RESOURCE && (*arg)->value.lval == 0) { + RETURN_FALSE; + } + mailparse_fetch_rfc2045_resource(rfcbuf, arg); + + + rfc2045_mimepos(rfcbuf, &start, &end, &body, &nlines, &nbodylines); + + if (cbfunc) + rfc2045_cdecode_start(rfcbuf, (rfc2045_decode_user_func_t)&extract_callback_user_func, *cbfunc); + else + rfc2045_cdecode_start(rfcbuf, &extract_callback_stdout, NULL); + + if (Z_STRLEN_PP(bodystr) < end) + end = Z_STRLEN_PP(bodystr); + else + end = end-body; + + rfc2045_cdecode(rfcbuf, Z_STRVAL_PP(bodystr) + body, end); + rfc2045_cdecode_end(rfcbuf); + + RETURN_TRUE; + +} +/* }}} */ + +/* {{{ proto string mailparse_msg_extract_part_file(resource rfc2045, string filename[, string callbackfunc]) + Extracts/decodes a message section, decoding the transfer encoding +*/ +PHP_FUNCTION(mailparse_msg_extract_part_file) +{ + zval **arg, **filename, **cbfunc; + struct rfc2045 * rfcbuf; + char * filebuf = NULL; + FILE * fp = NULL; + off_t start, end, body; + off_t nlines; + off_t nbodylines; + + switch(ZEND_NUM_ARGS()) { + case 3: + if (zend_get_parameters_ex(3, &arg, &filename, &cbfunc) == FAILURE) { + WRONG_PARAM_COUNT; + } + if (Z_TYPE_PP(cbfunc) != IS_ARRAY) + convert_to_string_ex(cbfunc); + break; + case 2: + if (zend_get_parameters_ex(2, &arg, &filename) == FAILURE) { + WRONG_PARAM_COUNT; + } + cbfunc = NULL; + break; + } + convert_to_string_ex(filename); + + if ((*arg)->type == IS_RESOURCE && (*arg)->value.lval == 0) { + RETURN_FALSE; + } + mailparse_fetch_rfc2045_resource(rfcbuf, arg); + + /* figure out where the message part starts/ends */ + rfc2045_mimepos(rfcbuf, &start, &end, &body, &nlines, &nbodylines); + + if (cbfunc) + rfc2045_cdecode_start(rfcbuf, (rfc2045_decode_user_func_t)&extract_callback_user_func, *cbfunc); + else + rfc2045_cdecode_start(rfcbuf, &extract_callback_stdout, NULL); + + /* open file and read it in */ + fp = VCWD_FOPEN(Z_STRVAL_PP(filename), "rb"); + if (fp == NULL) { + zend_error(E_WARNING, "%s(): unable to open file %s", get_active_function_name(), Z_STRVAL_PP(filename)); + RETURN_FALSE; + } + if (fseek(fp, body, SEEK_SET) == -1) + { + zend_error(E_WARNING, "%s(): unable to seek to section start", get_active_function_name()); + RETVAL_FALSE; + goto cleanup; + } + filebuf = emalloc(MAILPARSE_BUFSIZ); + + + while (body < end) + { + size_t n = MAILPARSE_BUFSIZ; + + if ((off_t)n > end-body) + n=end-body; + n = fread(filebuf, sizeof(char), n, fp); + if (n == 0) + { + zend_error(E_WARNING, "%s(): error reading from file \"%s\", offset %d", get_active_function_name(), Z_STRVAL_PP(filename), body); + RETVAL_FALSE; + goto cleanup; + } + rfc2045_cdecode(rfcbuf, filebuf, n); + body += n; + } + RETVAL_TRUE; + +cleanup: + rfc2045_cdecode_end(rfcbuf); + if (fp) + fclose(fp); + if (filebuf) + efree(filebuf); +} +/* }}} */ + +/* {{{ proto array mailparse_msg_get_part_data(resource rfc2045) + Return an assoc. array of info about the message +*/ +/* NOTE: you may add keys to the array, but PLEASE do not remove the key/value pairs + that are emitted here - it will break my PHP scripts if you do! */ +PHP_FUNCTION(mailparse_msg_get_part_data) +{ + zval ** arg; + struct rfc2045 * rfcbuf; + char * content_type, *transfer_encoding, *charset; + off_t start, end, body, nlines, nbodylines; + char * disposition, * disposition_name, *disposition_filename; + char *p; + struct rfc2045attr * attr; + zval * headers; + + if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg) == FAILURE) { + WRONG_PARAM_COUNT; + } + + if ((*arg)->type == IS_RESOURCE && (*arg)->value.lval == 0) { + RETURN_FALSE; + } + + + mailparse_fetch_rfc2045_resource(rfcbuf, arg); + + if (array_init(return_value) == FAILURE) { + RETURN_FALSE; + } + + + rfc2045_mimeinfo(rfcbuf, (const char**)&content_type, (const char**)&transfer_encoding, (const char**)&charset); + rfc2045_mimepos(rfcbuf, &start, &end, &body, &nlines, &nbodylines); + + if (content_type && *content_type) + add_assoc_string(return_value, "content-type", content_type, 1); + + /* get attributes for content-type */ + attr = rfcbuf->content_type_attr; + while (attr != NULL) { + char buf[80]; + strcpy(buf, "content-"); + strcat(buf, attr->name); + add_assoc_string(return_value, buf, attr->value, 1); + attr = attr->next; + } + /* get attributes for content-disposition */ + attr = rfcbuf->content_disposition_attr; + while (attr != NULL) { + char buf[80]; + strcpy(buf, "disposition-"); + strcat(buf, attr->name); + add_assoc_string(return_value, buf, attr->value, 1); + attr = attr->next; + } + /* get headers for this section */ + MAKE_STD_ZVAL(headers); + *headers = *rfcbuf->headerhash; + INIT_PZVAL(headers); + zval_copy_ctor(headers); + /* add to result */ + zend_hash_update(HASH_OF(return_value), "headers", sizeof("headers"), &headers, sizeof(headers), NULL); + + add_assoc_string(return_value, "transfer-encoding", transfer_encoding, 1); + add_assoc_string(return_value, "charset", charset, 1); + + rfc2045_dispositioninfo(rfcbuf, (const char**)&disposition, (const char**)&disposition_name, (const char**)&disposition_filename); + if (disposition && *disposition) + add_assoc_string(return_value, "content-disposition", disposition, 1); + + if (*(p=(char*)rfc2045_content_id(rfcbuf))) + add_assoc_string(return_value, "content-id", p, 1); + if (*(p=(char*)rfc2045_content_description(rfcbuf))) + add_assoc_string(return_value, "content-description", p, 1); + if (*(p=(char*)rfc2045_content_language(rfcbuf))) + add_assoc_string(return_value, "content-language", p, 1); + if (*(p=(char*)rfc2045_content_md5(rfcbuf))) + add_assoc_string(return_value, "content-md5", p, 1); + if (*(p=(char*)rfc2045_content_base(rfcbuf))) { + add_assoc_string(return_value, "content-base", p, 1); + /* content base allocates mem */ + efree(p); + } + + + add_assoc_long(return_value, "starting-pos", start); + add_assoc_long(return_value, "starting-pos-body", body); + add_assoc_long(return_value, "ending-pos", end); + add_assoc_long(return_value, "line-count", nlines); + add_assoc_long(return_value, "body-line-count", nbodylines); +} +/* }}} */ + + +/* {{{ proto long mailparse_msg_get_part(resource rfc2045, string mimesection) + Return a handle on a given section in a mimemessage +*/ +PHP_FUNCTION(mailparse_msg_get_part) +{ + zval ** arg, ** mimesection; + struct rfc2045 * rfcbuf, * newsection; + + if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &arg, &mimesection) == FAILURE) { + WRONG_PARAM_COUNT; + } + + if ((*arg)->type == IS_RESOURCE && (*arg)->value.lval == 0) { + RETURN_FALSE; + } + + mailparse_fetch_rfc2045_resource(rfcbuf, arg); + + convert_to_string_ex(mimesection); + + newsection = rfc2045_find(rfcbuf, (*mimesection)->value.str.val); + + if (!newsection) { + php_error(E_WARNING, "%s(): cannot find section %s in message", get_active_function_name(), (*mimesection)->value.str.val); + RETURN_FALSE; + } + ZEND_REGISTER_RESOURCE(return_value, newsection, le_rfc2045_nofree); +} +/* }}} */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim: tw=78 ts=4 sw=4 + */ diff --git a/ext/mailparse/mailparse_rfc822.h b/ext/mailparse/mailparse_rfc822.h new file mode 100644 index 0000000000..74a4eb28bf --- /dev/null +++ b/ext/mailparse/mailparse_rfc822.h @@ -0,0 +1,161 @@ +/* $Id$ */ +#ifndef mailparse_rfc822_h +#define mailparse_rfc822_h + +/* +** Copyright 1998 - 2000 Double Precision, Inc. +** See COPYING for distribution information. +*/ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** The text string we want to parse is first tokenized into an array of +** struct rfc822token records. 'ptr' points into the original text +** string, and 'len' has how many characters from 'ptr' belongs to this +** token. +*/ + +struct rfc822token { + struct rfc822token *next; /* Unused by librfc822, for use by + ** clients */ + int token; +/* + Values for token: + + '(' - comment + '"' - quoted string + '<', '>', '@', ',', ';', ':', '.', '[', ']', '%', '!', '=', '?', '/' - RFC atoms. + 0 - atom +*/ + +#define mailparse_rfc822_is_atom(p) ( (p) == 0 || (p) == '"' || (p) == '(' ) + + const char *ptr; /* Pointer to value for the token. */ + int len; /* Length of token value */ +} ; + +/* +** After the struct rfc822token array is built, it is used to create +** the rfc822addr array, which is the array of addresses (plus +** syntactical fluff) extracted from those text strings. Each rfc822addr +** record has several possible interpretation: +** +** tokens is NULL - syntactical fluff, look in name/nname for tokens +** representing the syntactical fluff ( which is semicolons +** and list name: +** +** tokens is not NULL - actual address. The tokens representing the actual +** address is in tokens/ntokens. If there are comments in +** the address that are possible "real name" for the address +** they are saved in name/nname (name may be null if there +** is none). +** If nname is 1, and name points to a comment token, +** the address was specified in old-style format. Otherwise +** the address was specified in new-style route-addr format. +** +** The tokens and name pointers are set to point to the original rfc822token +** array. +*/ + +struct rfc822addr { + struct rfc822token *tokens; + struct rfc822token *name; +} ; + +/*************************************************************************** +** +** rfc822 tokens +** +***************************************************************************/ + +struct rfc822t { + struct rfc822token *tokens; + int ntokens; +} ; + +struct rfc822t * mailparse_rfc822t_alloc(const char *p, + void (*err_func)(const char *, int)); /* Parse addresses */ +void mailparse_rfc822t_free(struct rfc822t *); /* Free rfc822 structure */ + +void mailparse_rfc822tok_print(const struct rfc822token *, void (*)(char, void *), void *); + /* Print the tokens */ + +/*************************************************************************** +** +** rfc822 addresses +** +***************************************************************************/ + +struct rfc822a { + struct rfc822addr *addrs; + int naddrs; +} ; + +struct rfc822a * mailparse_rfc822a_alloc(struct rfc822t *); +void mailparse_rfc822a_free(struct rfc822a *); /* Free rfc822 structure */ + +void mailparse_rfc822_deladdr(struct rfc822a *, int); + +/* rfc822_print "unparses" the rfc822 structure. Each rfc822addr is "printed" + (via the attached function). NOTE: instead of separating addresses by + commas, the print_separator function is called. +*/ + +void mailparse_rfc822_print(const struct rfc822a *a, + void (*print_func)(char, void *), + void (*print_separator)(const char *, void *), void *); + +/* rfc822_print_common is an internal function */ + +void mailparse_rfc822_print_common(const struct rfc822a *a, + char *(*decode_func)(const char *, const char *), + const char *chset, + void (*print_func)(char, void *), + void (*print_separator)(const char *, void *), void *); + +/* Another unparser, except that only the raw addresses are extracted, + and each address is followed by a newline character */ + +void mailparse_rfc822_addrlist(const struct rfc822a *, void (*print_func)(char, void *), + void *); + +/* Now, just the comments. If comments not given, the address. */ +void mailparse_rfc822_namelist(const struct rfc822a *, void (*print_func)(char, void *), + void *); + +/* Unparse an individual name/addr from a list of addresses. If the given + index points to some syntactical fluff, this is a noop */ + +void mailparse_rfc822_prname(const struct rfc822a *, int, void (*)(char, void *), void *); +void mailparse_rfc822_praddr(const struct rfc822a *, int, void (*)(char, void *), void *); + +/* Like rfc822_prname, except that we'll also print the legacy format +** of a list designation. +*/ + +void mailparse_rfc822_prname_orlist(const struct rfc822a *, int, + void (*)(char, void *), void *); + +/* Extra functions */ + +char *mailparse_rfc822_gettok(const struct rfc822token *); +char *mailparse_rfc822_getaddr(const struct rfc822a *, int); +char *mailparse_rfc822_getname(const struct rfc822a *, int); +char *mailparse_rfc822_getname_orlist(const struct rfc822a *, int); +char *mailparse_rfc822_getaddrs(const struct rfc822a *); +char *mailparse_rfc822_getaddrs_wrap(const struct rfc822a *, int); + + +char *mailparse_rfc822_coresubj(const char *, int *); +char *mailparse_rfc822_coresubj_nouc(const char *, int *); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/ext/mailparse/php_mailparse.h b/ext/mailparse/php_mailparse.h new file mode 100755 index 0000000000..3cc0449dbf --- /dev/null +++ b/ext/mailparse/php_mailparse.h @@ -0,0 +1,90 @@ +/* + +----------------------------------------------------------------------+ + | PHP version 4.0 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997, 1998, 1999, 2000 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 2.02 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available at through the world-wide-web at | + | http://www.php.net/license/2_02.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: | + | Wez Furlong | + | Credit also given to Double Precision Inc. who wrote the code that | + | the support routines for this extension were based upon. | + +----------------------------------------------------------------------+ + */ +/* $Id$ */ + +#ifndef PHP_MAILPARSE_H +#define PHP_MAILPARSE_H + +extern zend_module_entry mailparse_module_entry; +#define phpext_mailparse_ptr &mailparse_module_entry + +#ifdef PHP_WIN32 +#define PHP_MAILPARSE_API __declspec(dllexport) +#else +#define PHP_MAILPARSE_API +#endif + +PHP_MINIT_FUNCTION(mailparse); +PHP_MSHUTDOWN_FUNCTION(mailparse); +PHP_RINIT_FUNCTION(mailparse); +PHP_RSHUTDOWN_FUNCTION(mailparse); +PHP_MINFO_FUNCTION(mailparse); + +PHP_FUNCTION(mailparse_msg_parse_file); +PHP_FUNCTION(mailparse_msg_get_part); +PHP_FUNCTION(mailparse_msg_get_structure); +PHP_FUNCTION(mailparse_msg_get_part_data); +PHP_FUNCTION(mailparse_msg_extract_part); +PHP_FUNCTION(mailparse_msg_extract_part_file); + +PHP_FUNCTION(mailparse_msg_create); +PHP_FUNCTION(mailparse_msg_free); +PHP_FUNCTION(mailparse_msg_parse); +PHP_FUNCTION(mailparse_msg_parse_file); + +PHP_FUNCTION(mailparse_msg_find); +PHP_FUNCTION(mailparse_msg_getstructure); +PHP_FUNCTION(mailparse_msg_getinfo); +PHP_FUNCTION(mailparse_msg_extract); +PHP_FUNCTION(mailparse_msg_extract_file); +PHP_FUNCTION(mailparse_rfc822_parse_addresses); +PHP_FUNCTION(mailparse_determine_best_xfer_encoding); +PHP_FUNCTION(mailparse_stream_encode); + +#include "rfc2045.h" +#include "mailparse_rfc822.h" + +#define MAILPARSE_BUFSIZ 4096 +ZEND_BEGIN_MODULE_GLOBALS(mailparse) + char * def_charset; /* default charset for use in (re)writing mail */ +ZEND_END_MODULE_GLOBALS(mailparse); + +extern ZEND_DECLARE_MODULE_GLOBALS(mailparse); + + +#ifdef ZTS +#define MAILPARSEG(v) (mailparse_globals->v) +#define MAILPARSELS_FETCH() zend_mailparse_globals *mailparse_globals = ts_resource(mailparse_globals_id) +#else +#define MAILPARSEG(v) (mailparse_globals.v) +#define MAILPARSELS_FETCH() +#endif + +#endif + + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim: sw=4 ts=4 tw=78 + */ diff --git a/ext/mailparse/rfc2045.c b/ext/mailparse/rfc2045.c new file mode 100755 index 0000000000..a513c1f7e6 --- /dev/null +++ b/ext/mailparse/rfc2045.c @@ -0,0 +1,1168 @@ +/* $Id$ */ +/* + ** Copyright 1998 - 1999 Double Precision, Inc. See COPYING for + ** distribution information. + */ + +#include "php.h" +#include "php_mailparse.h" + +#define MAXLEVELS 20 +#define MAXPARTS 300 + +/* + New RFC2045 structure. + */ + +struct rfc2045 *rfc2045_alloc() +{ + struct rfc2045 *p=(struct rfc2045 *)emalloc(sizeof(struct rfc2045)); + + /* Initialize everything to nulls, except for one thing */ + + memset(p, 0, sizeof(*p)); + + p->pindex=1; /* Start with part #1 */ + + /* Most of the time, we're about to read a header */ + p->workinheader=1; + + MAKE_STD_ZVAL(p->headerhash); + array_init(p->headerhash); + + return (p); +} + +const char *rfc2045_getattr(const struct rfc2045attr *p, const char *name) +{ + while (p) + { + if (p->name && strcmp(p->name, name) == 0) + return (p->value); + p=p->next; + } + return (0); +} + +void rfc2045_setattr(struct rfc2045attr **p, const char *name, const char *val) +{ + char *v; + + while (*p) + { + if (strcmp( (*p)->name, name) == 0) break; + p=&(*p)->next; + } + if (val == 0) + { + struct rfc2045attr *q= *p; + + if (q) + { + *p=q->next; + if (q->name) efree(q->name); + if (q->value) efree(q->value); + efree(q); + } + return; + } + + v = estrdup(val); + + if (!*p) + { + *p = (struct rfc2045attr *)emalloc(sizeof(**p)); + memset( (*p), 0, sizeof(**p)); + (*p)->name = estrdup(name); + } + if ( (*p)->value ) + efree ( (*p)->value ); + (*p)->value=v; +} + +/* static const char cb_name[]="boundary"; */ + +/* #define ContentBoundary(p) (rfc2045_getattr( (p)->content_type_attr, cb_name)) */ + +#define ContentBoundary(p) ( (p)->boundary ) + +/* + Unallocate the RFC2045 structure. Recursively unallocate + all sub-structures. Unallocate all associated buffers. + */ + +static void rfc2045_freeattr(struct rfc2045attr *p) +{ + while (p) + { + struct rfc2045attr *q=p->next; + + if (p->name) efree(p->name); + if (p->value) efree(p->value); + efree(p); + p=q; + } +} + +void rfc2045_free(struct rfc2045 *p) +{ + struct rfc2045 *q, *r; + + for (q=p->firstpart; q; ) + { + r=q->next; + rfc2045_free(q); + q=r; + } + rfc2045_freeattr(p->content_type_attr); + rfc2045_freeattr(p->content_disposition_attr); + + if (p->content_md5) efree(p->content_md5); + if (p->content_base) efree(p->content_base); + if (p->content_location) efree(p->content_location); + if (p->content_language) efree(p->content_language); + if (p->content_id) efree(p->content_id); + if (p->content_description) efree(p->content_description); + if (p->content_transfer_encoding) efree(p->content_transfer_encoding); + if (p->boundary) efree(p->boundary); + if (p->content_type) efree(p->content_type); + if (p->mime_version) efree(p->mime_version); + if (p->workbuf) efree(p->workbuf); + if (p->header) efree(p->header); + if (p->content_disposition) efree(p->content_disposition); + if (p->rw_transfer_encoding) efree(p->rw_transfer_encoding); + if (p->rfc2045acptr) + efree(p->rfc2045acptr); + zval_dtor(p->headerhash); + efree(p->headerhash); + efree(p); +} + +/* + Generic dynamic buffer append. + */ + +void rfc2045_add_buf( + char **bufptr, /* Buffer */ + size_t *bufsize, /* Buffer's maximum size */ + size_t *buflen, /* Buffer's current size */ + + const char *p, size_t len) /* Append this data */ +{ + if (len + *buflen > *bufsize) + { + size_t newsize=len+*buflen+256; + char *p= *bufptr ? (char *)erealloc(*bufptr, newsize): + (char *)emalloc(newsize); + + *bufptr=p; + *bufsize=newsize; + } + + memcpy(*bufptr + *buflen, p, len); + *buflen += len; +} + +/* Append to the work buffer */ + +void rfc2045_add_workbuf(struct rfc2045 *h, const char *p, size_t len) +{ + rfc2045_add_buf( &h->workbuf, &h->workbufsize, &h->workbuflen, p, len); +} + +/* Append one character to the work buffer */ + +void rfc2045_add_workbufch(struct rfc2045 *h, int c) +{ + char cc= (char)c; + + rfc2045_add_workbuf(h, &cc, 1); +} + +/* + Generic function to duplicate contents of a string. + The destination string may already be previously allocated, + so unallocate it. + */ + +static void set_string(char **p, + const char *q) +{ + if (*p) { + efree(*p); + *p=0; + } + if (!q) return; + + *p = estrdup(q); +} + +/* Update byte counts for this structure, and all the superstructures */ + +static void update_counts(struct rfc2045 *p, size_t newcnt, size_t newendcnt, + unsigned nlines) +{ + while (p) + { + p->endpos = newcnt; + p->endbody = newendcnt; + p->nlines += nlines; + if (!p->workinheader) + p->nbodylines += nlines; + p=p->parent; + } +} + +/* + Main entry point for RFC2045 parsing. External data is fed + by repetitively calling rfc2045_parse(). + + rfc2045_parse() breaks up input into lines, and calls doline() + to process each line. + */ + +static void doline(struct rfc2045 *); + +void rfc2045_parse(struct rfc2045 *h, const char *buf, size_t s) +{ + size_t l; + + while (s) + { + for (l=0; lworkbuflen=0; + } + else + rfc2045_add_workbuf(h, buf, l); + buf += l; + s -= l; + } + + /* + ** Our buffer's getting pretty big. Let's see if we can + ** partially handle it. + */ + + if (h->workbuflen > 512) + { + struct rfc2045 *p; + int l, i; + + for (p=h; p->lastpart && !p->lastpart->workclosed; + p=p->lastpart) + ; + + /* If p->workinheader, we've got a mother of all headers + ** here. Well, that's just too bad, we'll end up garbling + ** it. + */ + + l=h->workbuflen; + + /* We do need to make sure that the final \r\n gets + ** stripped off, so don't gobble up everything if + ** the last character we see is a \r + */ + + if (h->workbuf[l-1] == '\r') + --l; + + /* If we'll be rewriting, make sure rwprep knows about + ** stuff that was skipped just now. */ + + if (h->rfc2045acptr && !p->workinheader && + (!p->lastpart || !p->lastpart->workclosed)) + (*h->rfc2045acptr->section_contents)(h->rfc2045acptr, h->workbuf, l); + + update_counts(p, p->endpos+l, p->endpos+l, 0); + p->informdata=1; + for (i=0; lworkbuflen; l++) + h->workbuf[i++]=h->workbuf[l]; + h->workbuflen=i; + } +} + +/* + Append a new RFC2045 subpart. Adds new RFC2045 structure to the + end of the list of existing RFC2045 substructures. + */ + +static struct rfc2045 *append_part_noinherit(struct rfc2045 *p, size_t startpos) +{ + struct rfc2045 *newp; + + newp=rfc2045_alloc(); + if (p->lastpart) + { + p->lastpart->next=newp; + newp->pindex=p->lastpart->pindex+1; + } + else + { + p->firstpart=newp; + newp->pindex=0; + } + p->lastpart=newp; + newp->parent=p; + + /* Initialize source pointers */ + newp->startpos = newp->endpos = newp->startbody = newp->endbody = startpos; + + while (p->parent) + p=p->parent; + ++p->numparts; + + return (newp); +} + +static struct rfc2045 *append_part(struct rfc2045 *p, size_t startpos) +{ + struct rfc2045 *newp=append_part_noinherit(p, startpos); + + /* Substructures inherit content transfer encoding and character set */ + + set_string(&newp->content_transfer_encoding, + p->content_transfer_encoding); + rfc2045_setattr(&newp->content_type_attr, "charset", + rfc2045_getattr(p->content_type_attr, "charset")); + return (newp); +} + +/* + doline() processes next line in the RFC2045 message. + + Drills down the list of all the multipart messages currently open, + and checks if the line is a boundary line for the given multipart. + In theory the boundary line, if there is one, should be the boundary + line only for the inner multipart only, but, this takes into account + broken MIME messages. + */ + +static void do_header(struct rfc2045 *); + +static void doline(struct rfc2045 *p) +{ + size_t cnt=p->workbuflen; + char *c=p->workbuf; + size_t n=cnt-1; /* Strip \n (we always get at least a \n here) */ + struct rfc2045 *newp; + struct rfc2045ac *rwp=p->rfc2045acptr; + unsigned num_levels=0; + + size_t k; + int bit8=0; + + if (p->numparts > MAXPARTS) + { + p->rfcviolation |= RFC2045_ERR2COMPLEX; + return; + } + + for (k=0; klastpart && + !newp->lastpart->workclosed; newp=newp->lastpart, + ++num_levels) + { + if (ContentBoundary(newp) == 0 || newp->workinheader) + continue; + + if (newp->lastpart->informdata) + { + p=newp->lastpart; + p->informdata=0; + break; + } + } + + /* Drill down until we match a boundary, or until we've reached + the last RFC2045 section that has been opened. + */ + + while (p->lastpart) + { + size_t l; + const char *cb; + + if (p->lastpart->workclosed) + { + update_counts(p, p->endpos+cnt, p->endpos+cnt, 1); + return; + } + /* Leftover trash -- workclosed is set when the final + ** terminating boundary has been seen */ + + /* content_boundary may be set before the entire header + ** has been seen, so continue drilling down in that case + */ + + cb=ContentBoundary(p); + + if (cb == 0 || p->workinheader) + { + p=p->lastpart; + ++num_levels; + continue; + } + + l=strlen(cb); + + if (c[0] == '-' && c[1] == '-' && n >= 2+l && + strncasecmp(cb, c+2, l) == 0) + { + + if (rwp && (!p->lastpart || !p->lastpart->isdummy)) + (*rwp->end_section)(rwp); + + /* Ok, we've found a boundary */ + + if (n >= 4+l && strncmp(c+2+l, "--", 2) == 0) + { + /* Last boundary */ + + p->lastpart->workclosed=1; + update_counts(p, p->endpos+cnt, p->endpos+cnt, + 1); + return; + } + + /* Create new RFC2045 section */ + + newp=append_part(p, p->endpos+cnt); + update_counts(p, p->endpos+cnt, p->endpos+n, 1); + + /* The new RFC2045 section is MIME compliant */ + + newp->mime_version = estrdup(p->mime_version); + return; + } + p=p->lastpart; + ++num_levels; + } + + /* Ok, we've found the RFC2045 section that we're working with. + ** Now what? + */ + + if (! p->workinheader) + { + /* Processing body, just update the counts. */ + + size_t cnt_update=cnt; + + if (bit8 && !p->content_8bit && + (p->rfcviolation & RFC2045_ERR8BITCONTENT) == 0) + { + struct rfc2045 *q; + + for (q=p; q; q=q->parent) + q->rfcviolation |= RFC2045_ERR8BITCONTENT; + } + + /* + ** In multiparts, the final newline in a part belongs to the + ** boundary, otherwise, include it in the text. + */ + if (p->parent && p->parent->content_type && + strncasecmp(p->parent->content_type, + "multipart/", 10) == 0) + cnt_update=n; + + if (!p->lastpart || !p->lastpart->workclosed) + { + if (rwp && !p->isdummy) + (*rwp->section_contents)(rwp, c, cnt); + + update_counts(p, p->endpos+cnt, p->endpos+cnt_update, + 1); + } + return; + } + + if (bit8 && (p->rfcviolation & RFC2045_ERR8BITHEADER) == 0) + { + struct rfc2045 *q; + + for (q=p; q; q=q->parent) + q->rfcviolation |= RFC2045_ERR8BITHEADER; + } + + /* In the header */ + + if ( n == 0 ) /* End of header, body begins. Parse header. */ + { + do_header(p); /* Clean up any left over header line */ + p->workinheader=0; + + /* Message body starts right here */ + + p->startbody=p->endpos+cnt; + update_counts(p, p->startbody, p->startbody, 1); + --p->nbodylines; /* Don't count the blank line */ + + /* Discard content type and boundary if I don't understand this MIME flavor. + * Allow broken messages that omit the Mime-Version header to still be + * parsed. + */ + + if (p->mime_version == NULL && p->content_type != NULL) { + /* technically in violation of the spec, but there are some broken + * mailers out there that send this. Sadly, they are so broken + * they don't set X-Mailer so we can't tell what they are... + * Lets be useful and allow it, but flag it as a boo-boo */ + p->mime_version = estrdup("1.0"); + p->rfcviolation |= RFC2045_ERRNOMIMEVERSION; + } + + if (!RFC2045_ISMIME1(p->mime_version)) + { + set_string(&p->content_type, 0); + + rfc2045_freeattr(p->content_type_attr); + p->content_type_attr=0; + set_string(&p->content_disposition, 0); + rfc2045_freeattr(p->content_disposition_attr); + p->content_disposition_attr=0; + if (p->boundary) + { + efree(p->boundary); + p->boundary=0; + } + } + + /* Normally, if we don't have a content_type, default it + ** to text/plain. However, if the multipart type is + ** multipart/digest, it is message/rfc822. + */ + + if (RFC2045_ISMIME1(p->mime_version) && !p->content_type) + { + char *q="text/plain"; + + if (p->parent && p->parent->content_type && + strcmp(p->parent->content_type, + "multipart/digest") == 0) + q="message/rfc822"; + set_string(&p->content_type, q); + } + + /* If this is not a multipart section, we don't want to + ** hear about any boundaries + */ + + if (!p->content_type || + strncmp(p->content_type, "multipart/", 10)) + rfc2045_setattr(&p->content_type_attr, "boundary", 0); + + /* If this section's a message, we will expect to see + ** more RFC2045 stuff, so create a nested RFC2045 structure, + ** and indicate that we expect to see headers. + */ + + if (p->content_type && + strcmp(p->content_type, "message/rfc822") == 0) + { + newp=append_part_noinherit(p, p->startbody); + newp->workinheader=1; + return; + } + + /* + ** If this is a multipart message (boundary defined), + ** create a RFC2045 structure for the pseudo-section + ** that precedes the first boundary line. + */ + + if (ContentBoundary(p)) + { + newp=append_part(p, p->startbody); + newp->workinheader=0; + newp->isdummy=1; + /* It's easier just to create it. */ + return; + } + + if (rwp) + (*rwp->start_section)(rwp, p); + return; + } + + /* RFC822 header continues */ + + update_counts(p, p->endpos + cnt, p->endpos+n, 1); + + /* If this header line starts with a space, append one space + ** to the saved contents of the previous line, and append this + ** line to it. + */ + + if (isspace((int)(unsigned char)*c)) + { + rfc2045_add_buf(&p->header, &p->headersize, &p->headerlen, " ", 1); + } + else + { + /* Otherwise the previous header line is complete, so process it */ + + do_header(p); + p->headerlen=0; + } + + /* Save this line in the header buffer, because the next line + ** could be a continuation. + */ + + rfc2045_add_buf( &p->header, &p->headersize, &p->headerlen, c, n); +} + +/***********************************************************************/ + +/* + ** paste_tokens() - recombine an array of RFC822 tokens back as a string. + ** (Comments) are ignored. + */ + +static char *paste_tokens(struct rfc822t *h, int start, int cnt) +{ + int l; + int i; + char *p; + + /* Calculate string size */ + + l=1; + for (i=0; itokens[start+i].token == '(') + continue; + + if (mailparse_rfc822_is_atom(h->tokens[start+i].token)) + l += h->tokens[start+i].len; + else + l++; + } + + /* Do it */ + + p=( char *)emalloc(l); + l=0; + + for (i=0; itokens[start+i].token == '(') + continue; + + if (mailparse_rfc822_is_atom(h->tokens[start+i].token)) + { + int l2=h->tokens[start+i].len; + + memcpy(p+l, h->tokens[start+i].ptr, l2); + l += l2; + } + else p[l++]=h->tokens[start+i].token; + } + p[l]=0; + return (p); +} + +/* Various permutations of the above, including forcing the string to + ** lowercase + */ + +static char *lower_paste_tokens(struct rfc822t *h, int start, int cnt) +{ + char *p=paste_tokens(h, start, cnt); + char *q; + + for (q=p; q && *q; q++) + *q=tolower(*q); + return (p); +} + +static char *paste_token(struct rfc822t *h, int i) +{ + if (i >= h->ntokens) return (0); + return (paste_tokens(h, i, 1)); +} + +static char *lower_paste_token(struct rfc822t *h, int i) +{ + char *p=paste_token(h, i); + char *q; + + for (q=p; q && *q; q++) + *q=tolower(*q); + return (p); +} + +/* + do_header() - process completed RFC822 header. + */ + +static void mime_version(struct rfc2045 *, struct rfc822t *); +static void content_type(struct rfc2045 *, struct rfc822t *); +static void content_transfer_encoding(struct rfc2045 *, struct rfc822t *); +static void content_disposition(struct rfc2045 *, struct rfc822t *); +static void content_id(struct rfc2045 *, struct rfc822t *); +static void content_description(struct rfc2045 *, const char *); +static void content_language(struct rfc2045 *, const char *); +static void content_md5(struct rfc2045 *, const char *); +static void content_base(struct rfc2045 *, struct rfc822t *); +static void content_location(struct rfc2045 *, struct rfc822t *); + +static void do_header(struct rfc2045 *p) +{ + struct rfc822t *header; + char *t; + char * val; + + if (p->headerlen == 0) return; + rfc2045_add_buf( &p->header, &p->headersize, &p->headerlen, "", 1); + /* 0 terminate */ + + /* Parse the header line according to RFC822 */ + + header=mailparse_rfc822t_alloc(p->header, NULL); + + if (!header) return; /* Broken header */ + + if (header->ntokens < 2 || + header->tokens[0].token || + header->tokens[1].token != ':') + { + mailparse_rfc822t_free(header); + return; /* Broken header */ + } + + t=lower_paste_token(header, 0); + + + if (t != 0) { + + /* add the header to the hash */ + val = strchr(p->header, ':'); + if (val) { + val++; + while(isspace(*val)) + val++; + add_assoc_string(p->headerhash, t, val, 1); + } + if (strcmp(t, "mime-version") == 0) + { + efree(t); + mime_version(p, header); + } + else if (strcmp(t, "content-type") == 0) + { + efree(t); + content_type(p, header); + } else if (strcmp(t, "content-transfer-encoding") == 0) + { + efree(t); + content_transfer_encoding(p, header); + } else if (strcmp(t, "content-disposition") == 0) + { + efree(t); + content_disposition(p, header); + } else if (strcmp(t, "content-id") == 0) + { + efree(t); + content_id(p, header); + } else if (strcmp(t, "content-description") == 0) + { + efree(t); + t=strchr(p->header, ':'); + if (t) ++t; + while (t && isspace((int)(unsigned char)*t)) + ++t; + content_description(p, t); + } else if (strcmp(t, "content-language") == 0) + { + efree(t); + t=strchr(p->header, ':'); + if (t) ++t; + while (t && isspace((int)(unsigned char)*t)) + ++t; + content_language(p, t); + } else if (strcmp(t, "content-base") == 0) + { + efree(t); + content_base(p, header); + } else if (strcmp(t, "content-location") == 0) + { + efree(t); + content_location(p, header); + } else if (strcmp(t, "content-md5") == 0) + { + efree(t); + t=strchr(p->header, ':'); + if (t) ++t; + while (t && isspace((int)(unsigned char)*t)) + ++t; + content_md5(p, t); + } + else efree(t); + } + mailparse_rfc822t_free(header); +} + +/* Mime-Version: and Content-Transfer-Encoding: headers are easy */ + +static void mime_version(struct rfc2045 *p, struct rfc822t *header) +{ + char *vers=paste_tokens(header, 2, header->ntokens-2); + + if (!vers) return; + + if (p->mime_version) efree(p->mime_version); + p->mime_version=vers; +} + +static void content_transfer_encoding(struct rfc2045 *r, + struct rfc822t *header) +{ + char *p; + + p=lower_paste_tokens(header, 2, header->ntokens-2); + if (!p) return; + + if (r->content_transfer_encoding) + efree(r->content_transfer_encoding); + r->content_transfer_encoding=p; + + if (strcmp(p, "8bit") == 0) + r->content_8bit=1; +} + +/* Dig into the content_type header */ + +static void parse_content_header(struct rfc2045 *r, struct rfc822t *header, + void (*init_token)(struct rfc2045 *, char *), + void (*init_parameter)(struct rfc2045 *, const char *, + struct rfc822t *, int, int)) +{ + int start; + int i, j; + char *p; + + /* Look for the 1st ; */ + + for (start=2; start < header->ntokens; start++) + if (header->tokens[start].token == ';') + break; + + /* Everything up to the 1st ; is the content type */ + + p=lower_paste_tokens(header, 2, start-2); + if (!p) return; + + (*init_token)(r, p); + if (start < header->ntokens) start++; + + /* Handle the remainder of the Content-Type: header */ + + while (start < header->ntokens) + { + /* Look for next ; */ + + for (i=start; intokens; i++) + if (header->tokens[i].token == ';') + break; + j=start; + if (j < i) + { + ++j; + + /* We only understand = */ + + while (j < i && header->tokens[j].token == '(') + ++j; + if (j < i && header->tokens[j].token == '=') + { + ++j; + p=lower_paste_token(header, start); + if (!p) return; + (*init_parameter)(r, p, header, j, i-j); + efree(p); + } + } + if ( intokens ) ++i; /* Skip over ; */ + start=i; + } +} + +/* Dig into the content_type header */ + +static void save_content_type(struct rfc2045 *, char *); +static void save_content_type_parameter( struct rfc2045 *, const char *, + struct rfc822t *, int, int); + +static void content_type(struct rfc2045 *r, struct rfc822t *header) +{ + parse_content_header(r, header, &save_content_type, + &save_content_type_parameter); +} + +static void save_content_type(struct rfc2045 *r, char *content_type) +{ + if (r->content_type) efree(r->content_type); + r->content_type=content_type; +} + +static void save_content_type_parameter( + struct rfc2045 *r, const char *name, + struct rfc822t *header, int start, int len) +{ + char *p; + + p=strcmp(name, "charset") == 0 ? + lower_paste_tokens(header, start, len): + paste_tokens(header, start, len); + if (!p) return; + + rfc2045_setattr(&r->content_type_attr, name, p); + efree(p); + + if (strcmp(name, "boundary") == 0) + { + if (r->boundary) + efree(r->boundary); + p=lower_paste_tokens(header, start, len); + r->boundary=p; + } +} + +/* Dig into content-disposition */ + +static void save_content_disposition(struct rfc2045 *, char *); +static void save_content_disposition_parameter( struct rfc2045 *, const char *, + struct rfc822t *, int, int); + +static void content_disposition(struct rfc2045 *r, struct rfc822t *header) +{ + parse_content_header(r, header, &save_content_disposition, + &save_content_disposition_parameter); +} + +static void save_content_disposition(struct rfc2045 *r, + char *content_disposition) +{ + if (r->content_disposition) efree(r->content_disposition); + r->content_disposition=content_disposition; +} + +static void save_content_disposition_parameter( + struct rfc2045 *r, const char *name, + struct rfc822t *header, int start, int len) +{ + char *p; + + p=paste_tokens(header, start, len); + if (!p) return; + + rfc2045_setattr(&r->content_disposition_attr, name, p); + efree(p); +} + +char *rfc2045_related_start(const struct rfc2045 *p) +{ + const char *cb=rfc2045_getattr( p->content_type_attr, "start"); + struct rfc822t *t; + struct rfc822a *a; + int i; + + if (!cb || !*cb) return (0); + + t=mailparse_rfc822t_alloc(cb, 0); + a=mailparse_rfc822a_alloc(t); + for (i=0; inaddrs; i++) + if (a->addrs[i].tokens) + { + char *s=mailparse_rfc822_getaddr(a, i); + + mailparse_rfc822a_free(a); + mailparse_rfc822t_free(t); + return (s); + } + + mailparse_rfc822a_free(a); + mailparse_rfc822t_free(t); + return (0); +} + +static void content_id(struct rfc2045 *p, struct rfc822t *t) +{ + struct rfc822a *a=mailparse_rfc822a_alloc(t); + int i; + + + for (i=0; inaddrs; i++) + if (a->addrs[i].tokens) + { + char *s=mailparse_rfc822_getaddr(a, i); + if (p->content_id) + efree(p->content_id); + p->content_id=s; + break; + } + + mailparse_rfc822a_free(a); +} + +static void content_description(struct rfc2045 *p, const char *s) +{ + if (s && *s) + set_string(&p->content_description, s); +} + +static void content_language(struct rfc2045 *p, const char *s) +{ + if (s && *s) + set_string(&p->content_language, s); +} + +static void content_md5(struct rfc2045 *p, const char *s) +{ + if (s && *s) + set_string(&p->content_md5, s); +} + +static void content_base(struct rfc2045 *p, struct rfc822t *t) +{ + char *s; + int i; + + for (i=0; intokens; i++) + if (t->tokens[i].token == '"') + t->tokens[i].token=0; + + s=paste_tokens(t, 2, t->ntokens-2); + set_string(&p->content_base, s); +} + +static void content_location(struct rfc2045 *p, struct rfc822t *t) +{ + char *s; + int i; + + for (i=0; intokens; i++) + if (t->tokens[i].token == '"') + t->tokens[i].token=0; + + s=paste_tokens(t, 2, t->ntokens-2); + set_string(&p->content_location, s); +} + +/* -------------------- */ + +#define GETINFO(s, def) ( (s) && (*s) ? (s):def) + +void rfc2045_mimeinfo(const struct rfc2045 *p, + const char **content_type_s, + const char **content_transfer_encoding_s, + const char **charset_s) +{ + const char *c; + MAILPARSELS_FETCH(); + + *content_type_s=GETINFO(p->content_type, "text/plain"); + *content_transfer_encoding_s=GETINFO(p->content_transfer_encoding, + "8bit"); + + c=rfc2045_getattr(p->content_type_attr, "charset"); + if (!c) + c = MAILPARSEG(def_charset); + + *charset_s=c; +} + +const char *rfc2045_boundary(const struct rfc2045 *p) +{ + const char *cb=rfc2045_getattr( p->content_type_attr, "boundary"); + + if (!cb) cb=""; + return (cb); +} + +void rfc2045_dispositioninfo(const struct rfc2045 *p, + const char **disposition_s, + const char **disposition_name_s, + const char **disposition_filename_s) +{ + *disposition_s=p->content_disposition; + *disposition_name_s=rfc2045_getattr(p->content_disposition_attr, + "name"); + *disposition_filename_s=rfc2045_getattr(p->content_disposition_attr, + "filename"); +} + +const char *rfc2045_contentname(const struct rfc2045 *p) +{ + const char *q=rfc2045_getattr(p->content_type_attr, "name"); + + if (!q) q=""; + return (q); +} + +const char *rfc2045_content_id(const struct rfc2045 *p) +{ + return (p->content_id ? p->content_id:""); +} + +const char *rfc2045_content_description(const struct rfc2045 *p) +{ + return (p->content_description ? p->content_description:""); +} + +const char *rfc2045_content_language(const struct rfc2045 *p) +{ + return (p->content_language ? p->content_language:""); +} + +const char *rfc2045_content_md5(const struct rfc2045 *p) +{ + return (p->content_md5 ? p->content_md5:""); +} + +void rfc2045_mimepos(const struct rfc2045 *p, + off_t *start_pos, off_t *end_pos, off_t *start_body, + off_t *nlines, off_t *nbodylines) +{ + *start_pos=p->startpos; + *end_pos=p->endpos; + + *nlines=p->nlines; + *nbodylines=p->nbodylines; + if (p->parent) /* MIME parts do not have the trailing CRLF */ + { + *end_pos=p->endbody; + if (*nlines) --*nlines; + if (*nbodylines) --*nbodylines; + } + *start_body=p->startbody; +} + +unsigned rfc2045_mimepartcount(const struct rfc2045 *p) +{ + const struct rfc2045 *q; + unsigned n=0; + + for (q=p->firstpart; q; q=q->next) ++n; + return (n); +} +/* + * vim:tw=78 sw=4 ts=4 + * */ diff --git a/ext/mailparse/rfc2045.h b/ext/mailparse/rfc2045.h new file mode 100755 index 0000000000..30d93e6bb8 --- /dev/null +++ b/ext/mailparse/rfc2045.h @@ -0,0 +1,202 @@ +/* +** Copyright 1998 - 2000 Double Precision, Inc. See COPYING for +** distribution information. +*/ + +/* +** $Id$ +*/ +#ifndef rfc2045_h +#define rfc2045_h + +#include "php_mailparse.h" +#include "ext/mbstring/mbfilter.h" + +#define RFC2045CHARSET "us-ascii" +#define RFC2045MIMEMSG "This is a MIME-formatted message.\n" + + +#ifdef __cplusplus +extern "C" { +#endif + +#define RFC2045_ISMIME1(p) ((p) && atoi(p) == 1) +#define RFC2045_ISMIME1DEF(p) (!(p) || atoi(p) == 1) + +struct rfc2045; + +/* callback for de/encoding */ +typedef int (*rfc2045_decode_user_func_t)(const char *p, size_t n, void *ptr); +typedef int (*rfc2045_decode_func_t)(struct rfc2045 * part, const char * buf, size_t n); + +/* the attributes of a given header */ +struct rfc2045attr { + struct rfc2045attr *next; + char *name; + char *value; +}; + +struct rfc2045 { + struct rfc2045 *parent; + unsigned pindex; + struct rfc2045 *next; + + off_t startpos, /* At which offset in msg this section starts */ + endpos, /* Where it ends */ + startbody, /* Where the body of the msg starts */ + endbody; /* endpos - trailing CRLF terminator */ + off_t nlines; /* Number of lines in message */ + off_t nbodylines; /* Number of lines only in the body */ + char *mime_version; + char *content_type; + struct rfc2045attr *content_type_attr; /* Content-Type: attributes */ + + char *content_disposition; + char *boundary; + struct rfc2045attr *content_disposition_attr; + char *content_transfer_encoding; + /* Set if content_transfer_encoding is 8bit */ + int content_8bit; + char *content_id; + char *content_description; + char *content_language; + char *content_md5; + char *content_base; + char *content_location; + struct rfc2045ac *rfc2045acptr; + int has8bitchars; /* For rewriting */ + int haslongline; /* For rewriting */ + unsigned rfcviolation; /* Boo-boos */ + +#define RFC2045_ERR8BITHEADER 1 /* 8 bit characters in headers */ +#define RFC2045_ERR8BITCONTENT 2 /* 8 bit contents, but no 8bit content-transfer-encoding */ +#define RFC2045_ERR2COMPLEX 4 /* Too many nested contents */ +#define RFC2045_ERRNOMIMEVERSION 8 /* missing Mime-Version header, but boundary set in content type */ + unsigned numparts; /* # of parts allocated */ + + char *rw_transfer_encoding; /* For rewriting */ + +#define RFC2045_RW_7BIT 1 +#define RFC2045_RW_8BIT 2 + + /* Subsections */ + + struct rfc2045 *firstpart, *lastpart; + + /* Working area */ + + char *workbuf; + size_t workbufsize; + size_t workbuflen; + int workinheader; + int workclosed; + int isdummy; + int informdata; /* In a middle of a long form-data part */ + char *header; + size_t headersize; + size_t headerlen; + + zval * headerhash; /* a record of all of the headers */ + + /* decoding filter to use */ + mbfl_convert_filter * decode_filter; + /* "user" function to accept the decoding output */ + rfc2045_decode_user_func_t udecode_func; + /* this is passed as the last param to the user decode func */ + void *misc_decode_ptr; +} ; + + +struct rfc2045 *rfc2045_alloc(); +void rfc2045_parse(struct rfc2045 *, const char *, size_t); +void rfc2045_free(struct rfc2045 *); + + +const char *rfc2045_contentname(const struct rfc2045 *); +void rfc2045_mimeinfo(const struct rfc2045 *, + const char **, + const char **, + const char **); +const char *rfc2045_boundary(const struct rfc2045 *); +char *rfc2045_related_start(const struct rfc2045 *); +const char *rfc2045_content_id(const struct rfc2045 *); +const char *rfc2045_content_description(const struct rfc2045 *); +const char *rfc2045_content_language(const struct rfc2045 *); +const char *rfc2045_content_md5(const struct rfc2045 *); + +void rfc2045_dispositioninfo(const struct rfc2045 *, + const char **, + const char **, + const char **); + +void rfc2045_mimepos(const struct rfc2045 *, off_t *, off_t *, off_t *, + off_t *, off_t *); +unsigned rfc2045_mimepartcount(const struct rfc2045 *); + +struct rfc2045id { + struct rfc2045id *next; + int idnum; +} ; + +void rfc2045_decode(struct rfc2045 *, + void (*)(struct rfc2045 *, struct rfc2045id *, void *), + void *); + +struct rfc2045 *rfc2045_find(struct rfc2045 *, const char *); + + + + +/* begin an en/decoding process */ +void rfc2045_cdecode_start(struct rfc2045 *, rfc2045_decode_user_func_t cb, void *); +int rfc2045_cdecode(struct rfc2045 *, const char *, size_t); +int rfc2045_cdecode_end(struct rfc2045 *); + +struct rfc2045ac { + void (*start_section)(struct rfc2045ac *, struct rfc2045 *); + void (*section_contents)(struct rfc2045ac *, const char *, size_t); + void (*end_section)(struct rfc2045ac *); + /* private vars used in acprep */ + int curlinepos; + struct rfc2045 *currwp; + enum { + raw, + quotedprint, + qpseeneq, + qpseeneqh, + base64 + } curstate; + int statechar; + + +}; + +struct rfc2045 *rfc2045_alloc_ac(); +int rfc2045_ac_check(struct rfc2045 *, int); +int rfc2045_rewrite(struct rfc2045 *, int, int, const char *); +int rfc2045_rewrite_func(struct rfc2045 *p, int, + int (*)(const char *, int, void *), void *, + const char *); + +/* Internal functions */ + +int rfc2045_try_boundary(struct rfc2045 *, int, const char *); +char *rfc2045_mk_boundary(struct rfc2045 *, int); +const char *rfc2045_getattr(const struct rfc2045attr *, const char *); +void rfc2045_setattr(struct rfc2045attr **, const char *, const char *); + +/* MIME content base/location */ + +char *rfc2045_content_base(struct rfc2045 *p); + /* This joins Content-Base: and Content-Location:, as best as I + ** can figure it out. + */ + +char *rfc2045_append_url(const char *, const char *); + /* Do this with two arbitrary URLs */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/ext/mailparse/rfc2045acchk.c b/ext/mailparse/rfc2045acchk.c new file mode 100644 index 0000000000..8fcd16dbaf --- /dev/null +++ b/ext/mailparse/rfc2045acchk.c @@ -0,0 +1,123 @@ +/* $Id$ */ +/* +** Copyright 1998 - 1999 Double Precision, Inc. See COPYING for +** distribution information. +*/ +#include "php.h" +#include "php_mailparse.h" + + +int rfc2045_ac_check(struct rfc2045 *p, int rwmode) +{ + int flag=0; /* Flag - rewriting suggested */ + struct rfc2045 *c; + int hasnon7bit=p->has8bitchars; + /* hasnon7bit: 8bit chars in this section or subsections */ + const char *te; + int is8bitte; + MAILPARSELS_FETCH(); + + for (c=p->firstpart; c; c=c->next) + if (!c->isdummy) + { + if (rfc2045_ac_check(c, rwmode)) flag=1; + if (strcmp(c->content_transfer_encoding, "7bit") && + strcmp(c->content_transfer_encoding, "quoted-printable")) + hasnon7bit=1; + if (c->has8bitchars) + p->has8bitchars=1; + } + + if (RFC2045_ISMIME1DEF(p->mime_version) && !p->content_type) + { + p->content_type = estrdup("text/plain"); + if (p->mime_version) + { + flag=1; + } + } + + if (RFC2045_ISMIME1DEF(p->mime_version) + && !rfc2045_getattr(p->content_type_attr, "charset") + && strncasecmp(p->content_type, "text/", 5) == 0) + { + rfc2045_setattr(&p->content_type_attr, "charset", + MAILPARSEG(def_charset)); + + if (p->mime_version + + && p->firstpart == 0 /* sam - don't trigger rewrites on changes to multipart headers */ + + ) + { + flag=1; + } + } + + if (RFC2045_ISMIME1DEF(p->mime_version) + && !p->content_transfer_encoding) + { + p->content_transfer_encoding = estrdup(hasnon7bit ? "8bit":"7bit"); + if (p->mime_version + + && p->firstpart == 0 /* sam - don't trigger rewrites on changes to multipart headers */ + ) + { + flag=1; + } + } + +#if 0 + if (RFC2045_ISMIME1DEF(p->mime_version) + && strncmp(p->content_type, "text/", 5) == 0 && !hasnon7bit + && strcmp(p->content_transfer_encoding, "7bit")) + { + if (p->mime_version) + { + flag=1; + } + } +#endif + + if (RFC2045_ISMIME1DEF(p->mime_version)) + { + /* Check for conversions */ + + te=p->content_transfer_encoding; + is8bitte=strcasecmp(te, "base64") && + strcasecmp(te, "quoted-printable") && + strcasecmp(te, "7bit"); /* 8 bit contents */ + + if (is8bitte && !p->has8bitchars && !p->haslongline) + { + if (p->rw_transfer_encoding) + efree(p->rw_transfer_encoding); + p->rw_transfer_encoding=estrdup("7bit"); + flag=1; + is8bitte=0; + } + + if (rwmode == RFC2045_RW_7BIT && (is8bitte || p->haslongline)) + { + if (p->rw_transfer_encoding) + efree(p->rw_transfer_encoding); + p->rw_transfer_encoding=estrdup("quoted-printable"); + flag=1; + } + else if (rwmode == RFC2045_RW_8BIT && + strcasecmp(te, "quoted-printable") == 0 && + !p->haslongline) + { + if (p->rw_transfer_encoding) + efree(p->rw_transfer_encoding); + p->rw_transfer_encoding=estrdup(hasnon7bit ? "8bit":"7bit"); + flag=1; + } + } + + if (!p->mime_version) + { + p->mime_version = estrdup("1.0"); + } + return (flag); +} diff --git a/ext/mailparse/rfc2045acprep.c b/ext/mailparse/rfc2045acprep.c new file mode 100644 index 0000000000..7f625b57c8 --- /dev/null +++ b/ext/mailparse/rfc2045acprep.c @@ -0,0 +1,99 @@ +/* $Id$ */ + +/* +** Copyright 1998 - 1999 Double Precision, Inc. See COPYING for +** distribution information. +*/ +#include "php.h" +#include "php_mailparse.h" + + +static void start_rwprep(struct rfc2045ac *, struct rfc2045 *); +static void do_rwprep(struct rfc2045ac *, const char *, size_t); +static void end_rwprep(struct rfc2045ac *); + +static struct rfc2045ac rfc2045acprep={ + &start_rwprep, + &do_rwprep, + &end_rwprep}; + +#define h2nyb(c) ( (c) >= 'a' && (c) <= 'f' ? (c)-('a'-10): \ + (c) >= 'A' && (c) <= 'F' ? (c)-('A'-10): (c)-'0') + +struct rfc2045 *rfc2045_alloc_ac() +{ + struct rfc2045 *p=rfc2045_alloc(); + + if (p) + { + p->rfc2045acptr = emalloc(sizeof(struct rfc2045ac)); + memcpy(p->rfc2045acptr, &rfc2045acprep, sizeof(struct rfc2045ac)); + p->rfc2045acptr->curlinepos = 0; + p->rfc2045acptr->currwp = NULL; + } + return (p); +} + + +static void start_rwprep(struct rfc2045ac * this_ptr, struct rfc2045 *p) +{ + this_ptr->currwp = p; + this_ptr->curlinepos=0; + this_ptr->curstate=raw; + if (p->content_transfer_encoding) + { + if (strcmp(p->content_transfer_encoding, + "quoted-printable") == 0) + this_ptr->curstate = quotedprint; + else if (strcmp(p->content_transfer_encoding, "base64") == 0) + this_ptr->curstate = base64; + } +} + +static void do_rwprep(struct rfc2045ac * this_ptr, const char * p, size_t n) +{ + if (!this_ptr->currwp) + return; + for ( ; n; --n, ++p) + switch (this_ptr->curstate) { + case quotedprint: + if (*p == '=') + { + this_ptr->curstate = qpseeneq; + continue; + } + /* FALLTHRU */ + case raw: + if (*p == '\r' || *p == '\n') + this_ptr->curlinepos = 0; + else if (++this_ptr->curlinepos > 500) + this_ptr->currwp->haslongline = 1; + if ((unsigned char)*p >= 127) + this_ptr->currwp->has8bitchars = 1; + break; + case qpseeneq: + if (*p == '\n') + { + this_ptr->curstate = quotedprint; + continue; + } + if (isspace((int)(unsigned char)*p)) continue; /* Ignore WSP */ + this_ptr->statechar = *p; + this_ptr->curstate = qpseeneqh; + continue; + case qpseeneqh: + this_ptr->curstate = quotedprint; + if ( (unsigned char) + ( (h2nyb(this_ptr->statechar) << 4) + h2nyb(*p) ) >= 127 + ) this_ptr->currwp->has8bitchars=1; + if (++this_ptr->curlinepos > 500) + this_ptr->currwp->haslongline=1; + continue; + case base64: + break; + } +} + +static void end_rwprep(struct rfc2045ac * this_ptr) +{ +} diff --git a/ext/mailparse/rfc2045appendurl.c b/ext/mailparse/rfc2045appendurl.c new file mode 100644 index 0000000000..231fbfe985 --- /dev/null +++ b/ext/mailparse/rfc2045appendurl.c @@ -0,0 +1,130 @@ +/* + +----------------------------------------------------------------------+ + | PHP version 4.0 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997, 1998, 1999, 2000, 2001 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 2.02 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available at through the world-wide-web at | + | http://www.php.net/license/2_02.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: | + | Wez Furlong (wez@thebrainroom.com) | + +----------------------------------------------------------------------+ + */ +/* $Id$ */ + +/* +** Copyright 2000 Double Precision, Inc. See COPYING for +** distribution information. +*/ + +#include "php.h" +#include "php_mailparse.h" + +/* +** --------------------------------------------------------------------- +** Attempt to parse Content-Base: and Content-Location:, and return the +** "base" of all the relative URLs in the section. +** --------------------------------------------------------------------- +*/ + +static void get_method_path(const char *p, + const char **method, + unsigned *methodl, + const char **path) +{ + unsigned i; + + for (i=0; p && p[i]; i++) + { + if (p[i] == ':') + { + *method=p; + *methodl= ++i; + *path=p+i; + return; + } + + if (!isalpha( (int)(unsigned char)p[i])) + break; + } + + *method=0; + *methodl=0; + *path=p; +} + +char *rfc2045_append_url(const char *base, const char *loc) +{ + const char *base_method; + unsigned base_method_l; + const char *base_path; + + const char *loc_method; + unsigned loc_method_l; + const char *loc_path; + char *buf, *q; + + get_method_path(base, &base_method, &base_method_l, &base_path); + get_method_path(loc, &loc_method, &loc_method_l, &loc_path); + + if (loc_method_l) + { + buf = emalloc(strlen(loc)+1); + strcpy(buf, loc); + return (buf); + } + + loc_method = base_method; + loc_method_l = base_method_l; + + if (!base_path) base_path = ""; + if (!loc_path) loc_path = ""; + + buf = emalloc(loc_method_l + strlen(base_path)+strlen(loc_path) + 3); + + if (loc_method_l) + memcpy(buf, loc_method, loc_method_l); + buf[loc_method_l] = 0; + + q=buf + loc_method_l; + + strcat(strcpy(q, base_path), "/"); + + if ( loc_path[0] == '/') + { + char *r; + + if (loc_path[1] == '/') { + *q=0; /* Location is absolute */ + } + else if ( q[0] == '/' && q[1] == '/' && (r=strchr(q+2, '/')) != 0) { + *r=0; /* Relative to top of base */ + } + else { + *q=0; /* No sys in base, just start with / */ + } + } + + strcat(q, loc_path); + + return (buf); +} + +char *rfc2045_content_base(struct rfc2045 *p) +{ + return (rfc2045_append_url(p->content_base, p->content_location)); +} + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim: tw=78 ts=4 sw=4 + */ diff --git a/ext/mailparse/rfc2045cdecode.c b/ext/mailparse/rfc2045cdecode.c new file mode 100755 index 0000000000..971c59d77b --- /dev/null +++ b/ext/mailparse/rfc2045cdecode.c @@ -0,0 +1,83 @@ +/* $Id$ */ +/* + ** Copyright 1998 - 1999 Double Precision, Inc. See COPYING for + ** distribution information. + */ + +#include "php.h" +#include "php_mailparse.h" + + +static int op_func(int c, void *dat) +{ + unsigned char C = (unsigned char)c; + struct rfc2045 * p = (struct rfc2045*)dat; + + (*p->udecode_func)(&C, 1, p->misc_decode_ptr); + + return c; +} + +void rfc2045_cdecode_start(struct rfc2045 *p, + rfc2045_decode_user_func_t u, + void *miscptr) +{ + enum mbfl_no_encoding from = mbfl_no_encoding_8bit; + + if (p->content_transfer_encoding) + { + from = mbfl_name2no_encoding(p->content_transfer_encoding); + if (from == mbfl_no_encoding_invalid) { + zend_error(E_WARNING, "%s(): I don't know how to decode %s transfer encoding!", + get_active_function_name(), + p->content_transfer_encoding); + from = mbfl_no_encoding_8bit; + } + } + + p->misc_decode_ptr=miscptr; + p->udecode_func=u; + p->workbuflen=0; + + if (from == mbfl_no_encoding_8bit) + p->decode_filter = NULL; + else + p->decode_filter = mbfl_convert_filter_new( + from, mbfl_no_encoding_8bit, + op_func, + NULL, + p + ); +} + +int rfc2045_cdecode_end(struct rfc2045 *p) +{ + if (p->decode_filter) + { + mbfl_convert_filter_flush(p->decode_filter); + mbfl_convert_filter_delete(p->decode_filter); + p->decode_filter = NULL; + } + return 0; +} + +int rfc2045_cdecode(struct rfc2045 *p, const char *s, size_t l) +{ + if (s && l) + { + int i; + + if (p->decode_filter) + { + for (i=0; idecode_filter) < 0) + return -1; + } + } + else + return ((*p->udecode_func)(s,l,p->misc_decode_ptr)); + + } + return (0); +} diff --git a/ext/mailparse/rfc2045decode.c b/ext/mailparse/rfc2045decode.c new file mode 100644 index 0000000000..1c066e3a99 --- /dev/null +++ b/ext/mailparse/rfc2045decode.c @@ -0,0 +1,40 @@ +/* $Id$ */ +/* +** Copyright 1998 - 1999 Double Precision, Inc. See COPYING for +** distribution information. +*/ + +#include "php.h" +#include "php_mailparse.h" + +static void decode(struct rfc2045id *topid, + struct rfc2045id **childidptr, + struct rfc2045 *r, + void (*func)(struct rfc2045 *, struct rfc2045id *, void *), + void *ptr) +{ +struct rfc2045id nextid; + + *childidptr=0; + (*func)(r, topid, ptr); + *childidptr=&nextid; + nextid.idnum=1; + if (r->content_type && strncmp(r->content_type, "multipart/", 10) == 0) + nextid.idnum=0; + for (r=r->firstpart; r; r=r->next) + { + if (nextid.idnum) + decode(topid, &nextid.next, r, func, ptr); + ++nextid.idnum; + } +} + +void rfc2045_decode(struct rfc2045 *p, + void (*func)(struct rfc2045 *, struct rfc2045id *, void *), + void *ptr) +{ +struct rfc2045id topid; + + topid.idnum=1; + decode(&topid, &topid.next, p, func, ptr); +} diff --git a/ext/mailparse/rfc2045find.c b/ext/mailparse/rfc2045find.c new file mode 100644 index 0000000000..ab93c2be4a --- /dev/null +++ b/ext/mailparse/rfc2045find.c @@ -0,0 +1,47 @@ +/* $Id$ */ +/* +** Copyright 1998 - 1999 Double Precision, Inc. See COPYING for +** distribution information. +*/ + +#include "php.h" +#include "php_mailparse.h" + +struct rfc2045findstruct { + const char *partnum; + struct rfc2045 *ptr; +} ; + +static void do_decode(struct rfc2045 *p, struct rfc2045id *id, void *ptr) +{ + struct rfc2045findstruct *fs=(struct rfc2045findstruct *)ptr; + const char *partnum=fs->partnum; + unsigned n; + + while (id) + { + if (!isdigit((int)(unsigned char)*partnum)) return; + n=0; + while (isdigit((int)(unsigned char)*partnum)) + n=n*10 + *partnum++ - '0'; + if (*partnum) + { + if (*partnum != '.') return; + ++partnum; + } + if (n != (unsigned)id->idnum) return; + id=id->next; + } + if ( *partnum == '\0') fs->ptr=p; +} + + +struct rfc2045 *rfc2045_find(struct rfc2045 *p, const char *str) +{ + struct rfc2045findstruct fs; + + fs.partnum=str; + fs.ptr=0; + rfc2045_decode(p, &do_decode, &fs); + return (fs.ptr); +} diff --git a/ext/mailparse/rfc822.c b/ext/mailparse/rfc822.c new file mode 100755 index 0000000000..312bf66cc7 --- /dev/null +++ b/ext/mailparse/rfc822.c @@ -0,0 +1,716 @@ +/* $Id$ */ +/* + ** Copyright 1998 - 1999 Double Precision, Inc. + ** See COPYING for distribution information. + */ + +#include "php.h" +#include "php_mailparse.h" + +static void tokenize(const char *p, struct rfc822token *tokp, int *toklen, + void (*err_func)(const char *, int)) +{ + const char *addr=p; + int i=0; + int inbracket=0; + + *toklen=0; + while (*p) + { + if (isspace((int)(unsigned char)*p)) + { + p++; + i++; + continue; + } + + switch (*p) { + int level; + + case '(': + if (tokp) + { + tokp->token='('; + tokp->ptr=p; + tokp->len=0; + } + level=0; + for (;;) + { + if (!*p) + { + if (err_func) (*err_func)(addr, i); + if (tokp) tokp->token='"'; + ++*toklen; + return; + } + if (*p == '(') + ++level; + if (*p == ')' && --level == 0) + { + p++; + i++; + if (tokp) tokp->len++; + break; + } + if (*p == '\\' && p[1]) + { + p++; + i++; + if (tokp) tokp->len++; + } + + i++; + if (tokp) tokp->len++; + p++; + } + if (tokp) ++tokp; + ++*toklen; + continue; + + case '"': + p++; + i++; + + if (tokp) + { + tokp->token='"'; + tokp->ptr=p; + } + while (*p != '"') + { + if (!*p) + { + if (err_func) (*err_func)(addr, i); + ++*toklen; + return; + } + if (*p == '\\' && p[1]) + { + if (tokp) tokp->len++; + p++; + i++; + } + if (tokp) tokp->len++; + p++; + i++; + } + ++*toklen; + if (tokp) ++tokp; + p++; + i++; + continue; + case '\\': + case ')': + if (err_func) (*err_func)(addr, i); + ++p; + ++i; + continue; + case '<': + case '>': + case '@': + case ',': + case ';': + case ':': + case '.': + case '[': + case ']': + case '%': + case '!': + case '?': + case '=': + case '/': + + if ( (*p == '<' && inbracket) || + (*p == '>' && !inbracket)) + { + if (err_func) (*err_func)(addr, i); + ++p; + ++i; + continue; + } + + if (*p == '<') + inbracket=1; + + if (*p == '>') + inbracket=0; + + if (tokp) + { + tokp->token= *p; + tokp->ptr=p; + tokp->len=1; + ++tokp; + } + ++*toklen; + + if (*p == '<' && p[1] == '>') + /* Fake a null address */ + { + if (tokp) + { + tokp->token=0; + tokp->ptr=""; + tokp->len=0; + ++tokp; + } + ++*toklen; + } + ++p; + ++i; + continue; + default: + + if (tokp) + { + tokp->token=0; + tokp->ptr=p; + tokp->len=0; + } + while (*p && !isspace((int)(unsigned char)*p) && strchr( + "<>@,;:.[]()%!\"\\?=/", *p) == 0) + { + if (tokp) ++tokp->len; + ++p; + ++i; + } + if (i == 0) /* Idiot check */ + { + if (err_func) (*err_func)(addr, i); + if (tokp) + { + tokp->token='"'; + tokp->ptr=p; + tokp->len=1; + ++tokp; + } + ++*toklen; + ++p; + ++i; + continue; + } + if (tokp) ++tokp; + ++*toklen; + } + } +} + +static void parseaddr(struct rfc822token *tokens, int ntokens, + struct rfc822addr *addrs, int *naddrs) +{ + int flag, j, k; + struct rfc822token save_token; + + *naddrs=0; + + while (ntokens) + { + int i; + + /* atoms (token=0) or quoted strings, followed by a : token + is a list name. */ + + for (i=0; itokens=0; + addrs->name=i ? tokens:0; + for (j=1; jname[j-1].next=addrs->name+j; + if (i) + addrs->name[i-1].next=0; + addrs++; + } + ++*naddrs; + tokens += i; + ntokens -= i; + continue; /* Group=phrase ":" */ + } + + /* Spurious commas are skipped, ;s are recorded */ + + if (tokens->token == ',' || tokens->token == ';') + { + if (tokens->token == ';') + { + if (addrs) + { + addrs->tokens=0; + addrs->name=tokens; + addrs->name->next=0; + addrs++; + } + ++*naddrs; + } + ++tokens; + --ntokens; + continue; + } + + /* If we can find a '<' before the next comma or semicolon, + we have new style RFC path address */ + + for (i=0; i'??? + If it consists exclusively of atoms, leave them alone. + Else, make them all a quoted string. */ + + for (j=0; jname= i ? tokens:0; + for (k=1; kname[k-1].next=addrs->name+k; + if (i) + addrs->name[i-1].next=0; + } + } + else /* Intentionally corrupt the original toks */ + { + if (addrs) + { + tokens->len= tokens[i-1].ptr + + tokens[i-1].len + - tokens->ptr; + /* We know that all the ptrs point + to parts of the same string. */ + tokens->token='"'; + /* Quoted string. */ + addrs->name=tokens; + addrs->name->next=0; + } + } + + /* Any comments in the name part are changed to quotes */ + + if (addrs) + { + struct rfc822token *t; + + for (t=addrs->name; t; t=t->next) + if (t->token == '(') + t->token='"'; + } + + /* Now that's done and over with, see what can + be done with the <...> part. */ + + ++i; + tokens += i; + ntokens -= i; + for (i=0; itokens=i ? tokens:0; + for (k=1; ktokens[k-1].next=addrs->tokens+k; + if (i) + addrs->tokens[i-1].next=0; + ++addrs; + } + ++*naddrs; + tokens += i; + ntokens -= i; + if (ntokens) /* Skip the '>' token */ + { + --ntokens; + ++tokens; + } + continue; + } + + /* Ok - old style address. Assume the worst */ + + /* Try to figure out where the address ends. It ends upon: + a comma, semicolon, or two consecutive atoms. */ + + flag=0; + for (i=0; iname=0; + } + + /* Ok, now get rid of embedded comments in the address. + Consider the last comment to be the real name */ + + if (addrs) + { + + save_token.ptr=0; + save_token.len=0; + + for (j=k=0; jname=tokens+i-1; + addrs->name->next=0; + } + addrs->tokens=k ? tokens:NULL; + for (j=1; jtokens[j-1].next=addrs->tokens+j; + if (k) + addrs->tokens[k-1].next=0; + ++addrs; + } + ++*naddrs; + tokens += i; + ntokens -= i; + } +} + +static void print_token(const struct rfc822token *token, + void (*print_func)(char, void *), void *ptr) +{ + const char *p; + int n; + + if (token->token == 0 || token->token == '(') + { + for (n=token->len, p=token->ptr; n; --n, ++p) + (*print_func)(*p, ptr); + return; + } + + if (token->token != '"') + { + (*print_func)(token->token, ptr); + return; + } + + (*print_func)('"', ptr); + n=token->len; + p=token->ptr; + while (n) + { + if (*p == '"' || (*p == '\\' && n == 1)) (*print_func)('\\', ptr); + if (*p == '\\' && n > 1) + { + (*print_func)('\\', ptr); + ++p; + --n; + } + (*print_func)(*p++, ptr); + --n; + } + (*print_func)('"', ptr); +} + +void mailparse_rfc822tok_print(const struct rfc822token *token, + void (*print_func)(char, void *), void *ptr) +{ + int prev_isatom=0; + int isatom; + + while (token) + { + isatom=mailparse_rfc822_is_atom(token->token); + if (prev_isatom && isatom) + (*print_func)(' ', ptr); + print_token(token, print_func, ptr); + prev_isatom=isatom; + token=token->next; + } +} + +void mailparse_rfc822_print(const struct rfc822a *rfcp, void (*print_func)(char, void *), + void (*print_separator)(const char *s, void *), void *ptr) +{ + mailparse_rfc822_print_common(rfcp, 0, 0, print_func, print_separator, ptr); +} + +void mailparse_rfc822_print_common(const struct rfc822a *rfcp, + char *(*decode_func)(const char *, const char *), const char *chset, + void (*print_func)(char, void *), + void (*print_separator)(const char *, void *), void *ptr) +{ + const struct rfc822addr *addrs=rfcp->addrs; + int naddrs=rfcp->naddrs; + + while (naddrs) + { + if (addrs->tokens == 0) + { + mailparse_rfc822tok_print(addrs->name, print_func, ptr); + ++addrs; + --naddrs; + if (addrs[-1].name && naddrs) + { + struct rfc822token *t; + + for (t=addrs[-1].name; t && t->next; t=t->next) + ; + + if (t && (t->token == ':' || t->token == ';')) + (*print_separator)(" ", ptr); + } + continue; + } + else if (addrs->name && addrs->name->token == '(') + { /* old style */ + char *p; + + mailparse_rfc822tok_print(addrs->tokens, print_func, ptr); + (*print_func)(' ', ptr); + + if (decode_func && (p=mailparse_rfc822_gettok(addrs->name))!=0) + { + char *q= (*decode_func)(p, chset); + char *r; + + for (r=q; r && *r; r++) + (*print_func)( (int)(unsigned char)*r, + ptr); + if (q) efree(q); + efree(p); + } + else mailparse_rfc822tok_print(addrs->name, print_func, ptr); + } + else + { + int print_braces=0; + char *p; + + if (addrs->name) + { + if (decode_func && + (p=mailparse_rfc822_gettok(addrs->name)) != 0) + { + char *q= (*decode_func)(p, chset); + char *r; + + for (r=q; r && *r; r++) + (*print_func)( + (int)(unsigned char)*r, + ptr); + if (q) efree(q); + efree(p); + } + else mailparse_rfc822tok_print(addrs->name, + print_func, ptr); + (*print_func)(' ', ptr); + print_braces=1; + } + else + { + struct rfc822token *p; + + for (p=addrs->tokens; p && p->next; p=p->next) + if (mailparse_rfc822_is_atom(p->token) && + mailparse_rfc822_is_atom(p->next->token)) + print_braces=1; + } + if (print_braces) + (*print_func)('<', ptr); + mailparse_rfc822tok_print(addrs->tokens, print_func, ptr); + if (print_braces) + { + (*print_func)('>', ptr); + } + } + ++addrs; + --naddrs; + if (naddrs) + if (addrs->tokens || (addrs->name && + mailparse_rfc822_is_atom(addrs->name->token))) + (*print_separator)(", ", ptr); + } +} + +void mailparse_rfc822t_free(struct rfc822t *p) +{ + if (p->tokens) efree(p->tokens); + efree(p); +} + +void mailparse_rfc822a_free(struct rfc822a *p) +{ + if (p->addrs) efree(p->addrs); + efree(p); +} + +void mailparse_rfc822_deladdr(struct rfc822a *rfcp, int index) +{ + int i; + + if (index < 0 || index >= rfcp->naddrs) return; + + for (i=index+1; inaddrs; i++) + rfcp->addrs[i-1]=rfcp->addrs[i]; + if (--rfcp->naddrs == 0) + { + efree(rfcp->addrs); + rfcp->addrs=0; + } +} + +struct rfc822t *mailparse_rfc822t_alloc(const char *addr, + void (*err_func)(const char *, int)) +{ + struct rfc822t *p=(struct rfc822t *)emalloc(sizeof(struct rfc822t)); + + if (!p) return (NULL); + memset(p, 0, sizeof(*p)); + + tokenize(addr, NULL, &p->ntokens, err_func); + p->tokens=p->ntokens ? (struct rfc822token *) + ecalloc(p->ntokens, sizeof(struct rfc822token)):0; + if (p->ntokens && !p->tokens) + { + mailparse_rfc822t_free(p); + return (NULL); + } + tokenize(addr, p->tokens, &p->ntokens, NULL); + return (p); +} + +struct rfc822a *mailparse_rfc822a_alloc(struct rfc822t *t) +{ + struct rfc822a *p=(struct rfc822a *)emalloc(sizeof(struct rfc822a)); + + if (!p) return (NULL); + memset(p, 0, sizeof(*p)); + + parseaddr(t->tokens, t->ntokens, NULL, &p->naddrs); + p->addrs=p->naddrs ? (struct rfc822addr *) + ecalloc(p->naddrs, sizeof(struct rfc822addr)):0; + if (p->naddrs && !p->addrs) + { + mailparse_rfc822a_free(p); + return (NULL); + } + parseaddr(t->tokens, t->ntokens, p->addrs, &p->naddrs); + return (p); +} + +void mailparse_rfc822_praddr(const struct rfc822a *rfcp, int index, + void (*print_func)(char, void *), void *ptr) +{ + const struct rfc822addr *addrs; + + if (index < 0 || index >= rfcp->naddrs) return; + + addrs=rfcp->addrs+index; + if (addrs->tokens) + { + mailparse_rfc822tok_print(addrs->tokens, print_func, ptr); + (*print_func)('\n', ptr); + } +} + +void mailparse_rfc822_addrlist(const struct rfc822a *rfcp, + void (*print_func)(char, void *), void *ptr) +{ + int i; + + for (i=0; inaddrs; i++) + mailparse_rfc822_praddr(rfcp, i, print_func, ptr); +} + +void mailparse_rfc822_prname(const struct rfc822a *rfcp, int index, + void (*print_func)(char, void *), void *ptr) +{ + const struct rfc822addr *addrs; + + if (index < 0 || index >= rfcp->naddrs) return; + + addrs=rfcp->addrs+index; + + if (!addrs->tokens) return; + mailparse_rfc822_prname_orlist(rfcp, index, print_func, ptr); +} + +void mailparse_rfc822_prname_orlist(const struct rfc822a *rfcp, int index, + void (*print_func)(char, void *), void *ptr) +{ + const struct rfc822addr *addrs; + + if (index < 0 || index >= rfcp->naddrs) return; + + addrs=rfcp->addrs+index; + + if (addrs->name) + { + struct rfc822token *i; + int n; + int prev_isatom=0; + int isatom=0; + + for (i=addrs->name; i; i=i->next, prev_isatom=isatom) + { + isatom=mailparse_rfc822_is_atom(i->token); + if (isatom && prev_isatom) + (*print_func)(' ', ptr); + + if (i->token != '(') + { + print_token(i, print_func, ptr); + continue; + } + + for (n=2; nlen; n++) + (*print_func)(i->ptr[n-1], ptr); + } + } else + mailparse_rfc822tok_print(addrs->tokens, print_func, ptr); + (*print_func)('\n', ptr); +} + +void mailparse_rfc822_namelist(const struct rfc822a *rfcp, + void (*print_func)(char, void *), void *ptr) +{ + int i; + + for (i=0; inaddrs; i++) + mailparse_rfc822_prname(rfcp, i, print_func, ptr); +} diff --git a/ext/mailparse/rfc822_getaddr.c b/ext/mailparse/rfc822_getaddr.c new file mode 100644 index 0000000000..7b9d204e7b --- /dev/null +++ b/ext/mailparse/rfc822_getaddr.c @@ -0,0 +1,99 @@ +/* $Id$ */ +/* + ** Copyright 1998 - 1999 Double Precision, Inc. + ** See COPYING for distribution information. + */ + +#include "php.h" +#include "php_mailparse.h" + +static void cntlen(char c, void *p) +{ + if (c != '\n') + ++ *(size_t *)p; +} + +static void saveaddr(char c, void *p) +{ + if (c != '\n') + { + char **cp=(char **)p; + + *(*cp)++=c; + } +} + +char *mailparse_rfc822_getaddr(const struct rfc822a *rfc, int n) +{ + size_t addrbuflen=0; + char *addrbuf, *ptr; + + mailparse_rfc822_praddr(rfc, n, &cntlen, &addrbuflen); + if (!(addrbuf=emalloc(addrbuflen+1))) + return (0); + + ptr=addrbuf; + mailparse_rfc822_praddr(rfc, n, &saveaddr, &ptr); + addrbuf[addrbuflen]=0; + return (addrbuf); +} + +char *mailparse_rfc822_getname(const struct rfc822a *rfc, int n) +{ + char *p, *q; + size_t addrbuflen=0; + char *addrbuf, *ptr; + + mailparse_rfc822_prname(rfc, n, &cntlen, &addrbuflen); + if (!(addrbuf=emalloc(addrbuflen+1))) + return (0); + + ptr=addrbuf; + mailparse_rfc822_prname(rfc, n, &saveaddr, &ptr); + addrbuf[addrbuflen]=0; + + /* Get rid of surrounding quotes */ + + for (p=q=addrbuf; *p; p++) + if (*p != '"') *q++=*p; + *q=0; + return (addrbuf); +} + +char *mailparse_rfc822_getname_orlist(const struct rfc822a *rfc, int n) +{ + char *p, *q; + size_t addrbuflen=0; + char *addrbuf, *ptr; + + mailparse_rfc822_prname_orlist(rfc, n, &cntlen, &addrbuflen); + if (!(addrbuf=emalloc(addrbuflen+1))) + return (0); + + ptr=addrbuf; + mailparse_rfc822_prname_orlist(rfc, n, &saveaddr, &ptr); + addrbuf[addrbuflen]=0; + + /* Get rid of surrounding quotes */ + + for (p=q=addrbuf; *p; p++) + if (*p != '"') *q++=*p; + *q=0; + return (addrbuf); +} + +char *mailparse_rfc822_gettok(const struct rfc822token *t) +{ + size_t addrbuflen=0; + char *addrbuf, *ptr; + + mailparse_rfc822tok_print(t, &cntlen, &addrbuflen); + + if (!(addrbuf=emalloc(addrbuflen+1))) + return (0); + + ptr=addrbuf; + mailparse_rfc822tok_print(t, &saveaddr, &ptr); + addrbuf[addrbuflen]=0; + return (addrbuf); +} diff --git a/ext/mailparse/rfc822_getaddrs.c b/ext/mailparse/rfc822_getaddrs.c new file mode 100644 index 0000000000..aa498d4119 --- /dev/null +++ b/ext/mailparse/rfc822_getaddrs.c @@ -0,0 +1,92 @@ +/* $Id$ */ +/* +** Copyright 1998 - 1999 Double Precision, Inc. +** See COPYING for distribution information. +*/ + +#include "php.h" +#include "php_mailparse.h" + +static void cntlen(char c, void *p) +{ + c=c; + ++ *(size_t *)p; +} + +static void cntlensep(const char *p, void *ptr) +{ + while (*p) cntlen(*p++, ptr); +} + +static void saveaddr(char c, void *ptr) +{ + *(*(char **)ptr)++=c; +} + +static void saveaddrsep(const char *p, void *ptr) +{ + while (*p) saveaddr(*p++, ptr); +} + +char *mailparse_rfc822_getaddrs(const struct rfc822a *rfc) +{ +size_t addrbuflen=0; +char *addrbuf, *ptr; + + mailparse_rfc822_print(rfc, &cntlen, &cntlensep, &addrbuflen); + if (!(addrbuf=emalloc(addrbuflen+1))) + return (0); + + ptr=addrbuf; + mailparse_rfc822_print(rfc, &saveaddr, &saveaddrsep, &ptr); + addrbuf[addrbuflen]=0; + return (addrbuf); +} + +static void saveaddrsep_wrap(const char *p, void *ptr) +{ +int c; + + while ((c=*p++) != 0) + { + if (c == ' ') c='\n'; + saveaddr(c, ptr); + } +} + +char *mailparse_rfc822_getaddrs_wrap(const struct rfc822a *rfc, int w) +{ +size_t addrbuflen=0; +char *addrbuf, *ptr, *start, *lastnl; + + mailparse_rfc822_print(rfc, &cntlen, &cntlensep, &addrbuflen); + if (!(addrbuf=emalloc(addrbuflen+1))) + return (0); + + ptr=addrbuf; + mailparse_rfc822_print(rfc, &saveaddr, &saveaddrsep_wrap, &ptr); + addrbuf[addrbuflen]=0; + + for (lastnl=0, start=ptr=addrbuf; *ptr; ) + { + while (*ptr && *ptr != '\n') ptr++; + if (ptr-start < w) + { + if (lastnl) *lastnl=' '; + lastnl=ptr; + if (*ptr) ++ptr; + } + else + { + if (lastnl) + start=lastnl+1; + else + { + start=ptr+1; + if (*ptr) ++ptr; + } + lastnl=0; + } + } + return (addrbuf); +} diff --git a/ext/mailparse/tests/001.phpt b/ext/mailparse/tests/001.phpt new file mode 100644 index 0000000000..d9cb030cbb --- /dev/null +++ b/ext/mailparse/tests/001.phpt @@ -0,0 +1,12 @@ +--TEST-- +Check for mailparse presence +--SKIPIF-- + +--POST-- +--GET-- +--FILE-- + +--EXPECT-- +mailparse extension is available diff --git a/ext/mailparse/tests/002.phpt b/ext/mailparse/tests/002.phpt new file mode 100644 index 0000000000..81f7405260 --- /dev/null +++ b/ext/mailparse/tests/002.phpt @@ -0,0 +1,22 @@ +--TEST-- +Check stream encoding +--SKIPIF-- + +--POST-- +--GET-- +--FILE-- + +--EXPECT-- +hello, this is some text=3Dhello. diff --git a/ext/mailparse/try.php b/ext/mailparse/try.php new file mode 100755 index 0000000000..ac2a32f1d8 --- /dev/null +++ b/ext/mailparse/try.php @@ -0,0 +1,53 @@ +\n"; +/* print a choice of sections */ +foreach($struct as $st) { + echo "\n"; + echo "$st\n"; + /* get a handle on the message resource for a subsection */ + $section = mailparse_msg_get_part($mime, $st); + /* get content-type, encoding and header information for that section */ + $info = mailparse_msg_get_part_data($section); + echo "\n"; + echo "" . $info["content-type"] . "\n"; + echo "" . $info["content-disposition"] . "\n"; + echo "" . $info["disposition-filename"] . "\n"; + echo "" . $info["charset"] . "\n"; + echo ""; +} +echo ""; + +/* if we were called to display a part, do so now */ +if ($showpart) { + /* get a handle on the message resource for the desired part */ + $sec = mailparse_msg_get_part($mime, $showpart); + + echo "
Section $showpart
"; + ob_start(); + /* extract the part from the message file and dump it to the output buffer + * */ + mailparse_msg_extract_part_file($sec, $filename); + $contents = ob_get_contents(); + ob_end_clean(); + /* quote the message for safe display in a browser */ + echo nl2br(htmlentities($contents)) . "
";; +} +?>