HEX = '0123456789ABCDEF'
EMPTYSTRING = ''
+try:
+ from binascii import a2b_qp, b2a_qp
+except:
+ a2b_qp = None
+ b2a_qp = None
-def needsquoting(c, quotetabs):
+def needsquoting(c, quotetabs, header):
"""Decide whether a particular character needs to be quoted.
The 'quotetabs' flag indicates whether embedded tabs and spaces should be
"""
if c in ' \t':
return quotetabs
+ # if header, we have to escape _ because _ is used to escape space
+ if c == '_':
+ return header
return c == ESCAPE or not (' ' <= c <= '~')
def quote(c):
-def encode(input, output, quotetabs):
+def encode(input, output, quotetabs, header = 0):
"""Read 'input', apply quoted-printable encoding, and write to 'output'.
'input' and 'output' are files with readline() and write() methods.
The 'quotetabs' flag indicates whether embedded tabs and spaces should be
quoted. Note that line-ending tabs and spaces are always encoded, as per
RFC 1521.
+ The 'header' flag indicates whether we are encoding spaces as _ as per
+ RFC 1522.
"""
+
+ if b2a_qp is not None:
+ data = input.read()
+ odata = b2a_qp(data, quotetabs = quotetabs, header = header)
+ output.write(odata)
+ return
+
def write(s, output=output, lineEnd='\n'):
# RFC 1521 requires that the line ending in a space or tab must have
# that trailing character encoded.
stripped = '\n'
# Calculate the un-length-limited encoded line
for c in line:
- if needsquoting(c, quotetabs):
+ if needsquoting(c, quotetabs, header):
c = quote(c)
- outline.append(c)
+ if header and c == ' ':
+ outline.append('_')
+ else:
+ outline.append(c)
# First, write out the previous line
if prevline is not None:
write(prevline)
if prevline is not None:
write(prevline, lineEnd=stripped)
-def encodestring(s, quotetabs=0):
+def encodestring(s, quotetabs = 0, header = 0):
+ if b2a_qp is not None:
+ return b2a_qp(s, quotetabs = quotetabs, header = header)
from cStringIO import StringIO
infp = StringIO(s)
outfp = StringIO()
- encode(infp, outfp, quotetabs)
+ encode(infp, outfp, quotetabs, header)
return outfp.getvalue()
-def decode(input, output):
+def decode(input, output, header = 0):
"""Read 'input', apply quoted-printable decoding, and write to 'output'.
+ 'input' and 'output' are files with readline() and write() methods.
+ If 'header' is true, decode underscore as space (per RFC 1522)."""
+
+ if a2b_qp is not None:
+ data = input.read()
+ odata = a2b_qp(data, header = header)
+ output.write(odata)
+ return
- 'input' and 'output' are files with readline() and write() methods."""
new = ''
while 1:
line = input.readline()
partial = 1
while i < n:
c = line[i]
- if c != ESCAPE:
+ if c == '_' and header:
+ new = new + ' '; i = i+1
+ elif c != ESCAPE:
new = new + c; i = i+1
elif i+1 == n and not partial:
partial = 1; break
if new:
output.write(new)
-def decodestring(s):
+def decodestring(s, header = 0):
+ if a2b_qp is not None:
+ return a2b_qp(s, header = header)
from cStringIO import StringIO
infp = StringIO(s)
outfp = StringIO()
- decode(infp, outfp)
+ decode(infp, outfp, header = header)
return outfp.getvalue()
** does make the performance sub-optimal. Oh well, too bad...
**
** Jack Jansen, CWI, July 1995.
+**
+** Added support for quoted-printable encoding, based on rfc 1521 et al
+** quoted-printable encoding specifies that non printable characters (anything
+** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
+** of the character. It also specifies some other behavior to enable 8bit data
+** in a mail message with little difficulty (maximum line sizes, protecting
+** some cases of whitespace, etc).
+**
+** Brandon Long, September 2001.
*/
hexstr must contain an even number of hex digits (upper or lower case).\n\
This function is also available as \"unhexlify()\"";
+static int table_hex[128] = {
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
+ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
+};
+
+#define hexval(c) table_hex[(unsigned int)(c)]
+
+#define MAXLINESIZE 76
+
+static char doc_a2b_qp[] = "Decode a string of qp-encoded data";
+
+static PyObject*
+binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ unsigned int in, out;
+ char ch;
+ unsigned char *data, *odata;
+ unsigned int datalen = 0;
+ PyObject *rv;
+ static char *kwlist[] = {"data", "header", NULL};
+ int header = 0;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
+ &datalen, &header))
+ return NULL;
+
+ /* We allocate the output same size as input, this is overkill */
+ odata = (char *) calloc(1, datalen);
+
+ if (odata == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ in = out = 0;
+ while (in < datalen) {
+ if (data[in] == '=') {
+ in++;
+ if (in >= datalen) break;
+ /* Soft line breaks */
+ if ((data[in] == '\n') || (data[in] == '\r') ||
+ (data[in] == ' ') || (data[in] == '\t')) {
+ if (data[in] != '\n') {
+ while (in < datalen && data[in] != '\n') in++;
+ }
+ if (in < datalen) in++;
+ }
+ else if (data[in] == '=') {
+ /* broken case from broken python qp */
+ odata[out++] = '=';
+ in++;
+ }
+ else if (((data[in] >= 'A' && data[in] <= 'F') ||
+ (data[in] >= 'a' && data[in] <= 'f') ||
+ (data[in] >= '0' && data[in] <= '9')) &&
+ ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
+ (data[in+1] >= 'a' && data[in+1] <= 'f') ||
+ (data[in+1] >= '0' && data[in+1] <= '9'))) {
+ /* hexval */
+ ch = hexval(data[in]) << 4;
+ in++;
+ ch |= hexval(data[in]);
+ in++;
+ odata[out++] = ch;
+ }
+ else {
+ odata[out++] = '=';
+ }
+ }
+ else if (header && data[in] == '_') {
+ odata[out++] = ' ';
+ in++;
+ }
+ else {
+ odata[out] = data[in];
+ in++;
+ out++;
+ }
+ }
+ if ((rv = PyString_FromStringAndSize(odata, out)) == NULL) {
+ free (odata);
+ return NULL;
+ }
+ free (odata);
+ return rv;
+}
+
+static int
+to_hex (unsigned char ch, unsigned char *s)
+{
+ unsigned int uvalue = ch;
+
+ s[1] = "0123456789ABCDEF"[uvalue % 16];
+ uvalue = (uvalue / 16);
+ s[0] = "0123456789ABCDEF"[uvalue % 16];
+ return 0;
+}
+
+static char doc_b2a_qp[] =
+"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
+ Encode a string using quoted-printable encoding. \n\
+\n\
+On encoding, when istext is set, newlines are not encoded, and white \n\
+space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
+both encoded. When quotetabs is set, space and tabs are encoded.";
+
+/* XXX: This is ridiculously complicated to be backward compatible
+ * (mostly) with the quopri module. It doesn't re-create the quopri
+ * module bug where text ending in CRLF has the CR encoded */
+static PyObject*
+binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ unsigned int in, out;
+ unsigned char *data, *odata;
+ unsigned int datalen = 0, odatalen = 0;
+ PyObject *rv;
+ unsigned int linelen = 0;
+ static char *kwlist[] = {"data", "quotetabs", "istext", "header", NULL};
+ int istext = 1;
+ int quotetabs = 0;
+ int header = 0;
+ unsigned char ch;
+ int crlf = 0;
+ unsigned char *p;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
+ &datalen, "etabs, &istext, &header))
+ return NULL;
+
+ /* See if this string is using CRLF line ends */
+ /* XXX: this function has the side effect of converting all of
+ * the end of lines to be the same depending on this detection
+ * here */
+ p = strchr(data, '\n');
+ if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
+ crlf = 1;
+
+ /* First, scan to see how many characters need to be encoded */
+ in = 0;
+ while (in < datalen) {
+ if ((data[in] > 126) ||
+ (data[in] == '=') ||
+ (header && data[in] == '_') ||
+ ((data[in] == '.') && (linelen == 1)) ||
+ (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
+ ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
+ ((data[in] < 33) &&
+ (data[in] != '\r') && (data[in] != '\n') &&
+ (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
+ {
+ if ((linelen + 3) >= MAXLINESIZE) {
+ linelen = 0;
+ if (crlf)
+ odatalen += 3;
+ else
+ odatalen += 2;
+ }
+ linelen += 3;
+ odatalen += 3;
+ in++;
+ }
+ else {
+ if (istext &&
+ ((data[in] == '\n') ||
+ ((in+1 < datalen) && (data[in] == '\r') &&
+ (data[in+1] == '\n'))))
+ {
+ linelen = 0;
+ /* Protect against whitespace on end of line */
+ if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
+ odatalen += 2;
+ if (crlf)
+ odatalen += 2;
+ else
+ odatalen += 1;
+ if (data[in] == '\r')
+ in += 2;
+ else
+ in++;
+ }
+ else {
+ if ((in + 1 != datalen) &&
+ (data[in+1] != '\n') &&
+ (linelen + 1) >= MAXLINESIZE) {
+ linelen = 0;
+ if (crlf)
+ odatalen += 3;
+ else
+ odatalen += 2;
+ }
+ linelen++;
+ odatalen++;
+ in++;
+ }
+ }
+ }
+
+ odata = (char *) calloc(1, odatalen);
+
+ if (odata == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ in = out = linelen = 0;
+ while (in < datalen) {
+ if ((data[in] > 126) ||
+ (data[in] == '=') ||
+ (header && data[in] == '_') ||
+ ((data[in] == '.') && (linelen == 1)) ||
+ (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
+ ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
+ ((data[in] < 33) &&
+ (data[in] != '\r') && (data[in] != '\n') &&
+ (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
+ {
+ if ((linelen + 3 )>= MAXLINESIZE) {
+ odata[out++] = '=';
+ if (crlf) odata[out++] = '\r';
+ odata[out++] = '\n';
+ linelen = 0;
+ }
+ odata[out++] = '=';
+ to_hex(data[in], &odata[out]);
+ out += 2;
+ in++;
+ linelen += 3;
+ }
+ else {
+ if (istext &&
+ ((data[in] == '\n') ||
+ ((in+1 < datalen) && (data[in] == '\r') &&
+ (data[in+1] == '\n'))))
+ {
+ linelen = 0;
+ /* Protect against whitespace on end of line */
+ if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
+ ch = odata[out-1];
+ odata[out-1] = '=';
+ to_hex(ch, &odata[out]);
+ out += 2;
+ }
+
+ if (crlf) odata[out++] = '\r';
+ odata[out++] = '\n';
+ if (data[in] == '\r')
+ in += 2;
+ else
+ in++;
+ }
+ else {
+ if ((in + 1 != datalen) &&
+ (data[in+1] != '\n') &&
+ (linelen + 1) >= MAXLINESIZE) {
+ odata[out++] = '=';
+ if (crlf) odata[out++] = '\r';
+ odata[out++] = '\n';
+ linelen = 0;
+ }
+ linelen++;
+ if (header && data[in] == ' ') {
+ odata[out++] = '_';
+ in++;
+ }
+ else {
+ odata[out++] = data[in++];
+ }
+ }
+ }
+ }
+ if ((rv = PyString_FromStringAndSize(odata, out)) == NULL) {
+ free (odata);
+ return NULL;
+ }
+ free (odata);
+ return rv;
+}
/* List of functions defined in the module */
doc_rledecode_hqx},
{"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
{"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
+ {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
+ doc_a2b_qp},
+ {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
+ doc_b2a_qp},
{NULL, NULL} /* sentinel */
};