4 * This module contains decoding routines for converting
5 * quoted-printable data into pure 8-bit data, in MIME
8 * By Henrik Storner <storner@image.dk>
10 * Configuration file support for fetchmail 4.3.8 by
11 * Frank Damgaard <frda@post3.tele.dk>
18 #include "fetchmail.h"
20 static inline unsigned char unhex(unsigned char c)
22 if ((c >= '0') && (c <= '9'))
24 else if ((c >= 'A') && (c <= 'F'))
25 return (c - 'A' + 10);
26 else if ((c >= 'a') && (c <= 'f'))
27 return (c - 'a' + 10);
32 static int qp_char(unsigned char c1, unsigned char c2, unsigned char *c_out)
37 if ((c1 > 15) || (c2 > 15))
48 * Routines to decode MIME QP-encoded headers, as per RFC 2047.
51 /* States of the decoding state machine */
52 #define S_COPY_PLAIN 0 /* Just copy, but watch for the QP flag */
53 #define S_SKIP_MIMEINIT 1 /* Get the encoding, and skip header */
54 #define S_COPY_MIME 2 /* Decode a sequence of coded characters */
56 static const char MIMEHDR_INIT[] = "=?"; /* Start of coded sequence */
57 static const char MIMEHDR_END[] = "?="; /* End of coded sequence */
59 void UnMimeHeader(unsigned char *hdr)
61 /* Decode a buffer containing data encoded according to RFC
62 * 2047. This only handles content-transfer-encoding; conversion
63 * between character sets is not implemented. In other words: We
64 * assume the charsets used can be displayed by your mail program
68 /* Note: Decoding is done "in-situ", i.e. without using an
69 * additional buffer for temp. storage. This is possible, since the
70 * decoded string will always be shorter than the encoded string,
71 * due to the en- coding scheme.
74 int state = S_COPY_PLAIN;
75 unsigned char *p_in, *p_out, *p;
79 /* Speed up in case this is not a MIME-encoded header */
80 p = strstr(hdr, MIMEHDR_INIT);
82 return; /* No MIME header */
84 /* Loop through the buffer.
85 * p_in : Next char to be processed.
86 * p_out: Where to put the next processed char
87 * enc : Encoding used (usually, 'q' = quoted-printable)
89 for (p_out = p_in = hdr; (*p_in); ) {
92 p = strstr(p_in, MIMEHDR_INIT);
95 * No more coded data in buffer,
96 * just move remainder into place.
98 i = strlen(p_in); /* How much left */
99 memmove(p_out, p_in, i);
100 p_in += i; p_out += i;
103 /* MIME header init found at location p */
105 /* There are some uncoded chars at the beginning. */
107 memmove(p_out, p_in, i);
111 state = S_SKIP_MIMEINIT;
115 case S_SKIP_MIMEINIT:
116 /* Mime type definition: "charset?encoding?" */
117 p = strchr(p_in, '?');
119 /* p_in .. (p-1) holds the charset */
121 /* *(p+1) is the transfer encoding, *(p+2) must be a '?' */
123 enc = tolower(*(p+1));
128 state = S_COPY_PLAIN;
131 state = S_COPY_PLAIN; /* Invalid data */
135 p = strstr(p_in, MIMEHDR_END); /* Find end of coded data */
136 if (p == NULL) p = p_in + strlen(p_in);
137 for (; (p_in < p); ) {
138 /* Decode all encoded data */
141 /* Decode one char qp-coded at (p_in+1) and (p_in+2) */
142 if (qp_char(*(p_in+1), *(p_in+2), p_out) == 0)
145 /* Invalid QP data - pass through unchanged. */
150 else if (*p_in == '_') {
152 * RFC 2047: '_' inside encoded word represents 0x20.
153 * NOT a space - always the value 0x20.
165 else if (enc == 'b') {
166 /* Decode base64 encoded data */
170 delimsave = *p; *p = '\r';
171 decoded_count = from64tobits(p_out, p_in);
173 if (decoded_count > 0)
174 p_out += decoded_count;
185 p_in += 2; /* Skip the MIMEHDR_END delimiter */
188 * We've completed decoding one encoded sequence. But another
189 * may follow immediately, in which case whitespace before the
190 * new MIMEHDR_INIT delimiter must be discarded.
191 * See if that is the case
193 p = strstr(p_in, MIMEHDR_INIT);
194 state = S_COPY_PLAIN;
197 * There is more MIME data later on. Is there
198 * whitespace only before the delimiter?
203 for (q=p_in; (wsp_only && (q < p)); q++)
204 wsp_only = isspace(*q);
208 * Whitespace-only before the MIME delimiter. OK,
209 * just advance p_in to past the new MIMEHDR_INIT,
210 * and prepare to process the new MIME charset/encoding
213 p_in = p + strlen(MIMEHDR_INIT);
214 state = S_SKIP_MIMEINIT;
227 * Routines for decoding body-parts of a message.
229 * Since the "fetch" part of fetchmail gets a message body
230 * one line at a time, we need to maintain some state variables
231 * across multiple invokations of the UnMimeBodyline() routine.
232 * The driver routine should call MimeBodyType() when all
233 * headers have been received, and then UnMimeBodyline() for
234 * every line in the message body.
237 #define S_BODY_DATA 0
241 * Flag indicating if we are currently processing
242 * the headers or the body of a (multipart) message.
244 static int BodyState = S_BODY_DATA;
247 * Flag indicating if we are in the process of decoding
248 * a quoted-printable body part.
250 static int CurrEncodingIsQP = 0;
253 * Delimiter for multipart messages. RFC 2046 states that this must
254 * NEVER be longer than 70 characters. Add 3 for the two hyphens
255 * at the beginning, and a terminating null.
257 #define MAX_DELIM_LEN 70
258 static unsigned char MultipartDelimiter[MAX_DELIM_LEN+3];
261 /* This string replaces the "Content-Transfer-Encoding: quoted-printable"
262 * string in all headers, including those in body-parts. It must be
263 * no longer than the original string.
265 static const char ENC8BIT[] = "Content-Transfer-Encoding: 8bit";
266 static void SetEncoding8bit(unsigned char *XferEncOfs)
270 if (XferEncOfs != NULL) {
271 memcpy(XferEncOfs, ENC8BIT, strlen(ENC8BIT));
273 /* If anything left, in this header, replace with whitespace */
274 for (p=XferEncOfs+strlen(ENC8BIT); (*p >= ' '); p++) *p=' ';
280 * This routine does three things:
281 * 1) It determines - based on the message headers - whether the
282 * message body is a MIME message that may hold 8 bit data.
283 * - A message that has a "quoted-printable" or "8bit" transfer
284 * encoding is assumed to contain 8-bit data (when decoded).
285 * - A multipart message is assumed to contain 8-bit data
286 * when decoded (there might be quoted-printable body-parts).
287 * - All other messages are assumed NOT to include 8-bit data.
288 * 2) It determines the delimiter-string used in multi-part message
290 * 3) It sets the initial values of the CurrEncodingIsQP and BodyState
291 * variables, from the header contents.
293 * The return value is a bitmask.
295 int MimeBodyType(unsigned char *hdrs)
297 unsigned char *NxtHdr = hdrs;
298 unsigned char *XferEnc, *XferEncOfs, *CntType, *MimeVer, *p;
299 int HdrsFound = 0; /* We only look for three headers */
300 int BodyType; /* Return value */
302 /* Setup for a standard (no MIME, no QP, 7-bit US-ASCII) message */
303 MultipartDelimiter[0] = '\0';
304 CurrEncodingIsQP = 0;
305 BodyState = S_BODY_DATA;
308 /* Just in case ... */
312 XferEnc = XferEncOfs = CntType = MimeVer = NULL;
315 if (strncasecmp("Content-Transfer-Encoding:", NxtHdr, 26) == 0) {
319 XferEnc = (char *)xmalloc(strlen(p) + 1);
324 else if (strncasecmp("Content-Type:", NxtHdr, 13) == 0) {
326 * This one is difficult. We cannot use the standard
327 * nxtaddr() routine, since the boundary-delimiter is
328 * (probably) enclosed in quotes - and thus appears
329 * as an rfc822 comment, and nxtaddr() "eats" up any
330 * spaces in the delimiter. So, we have to do this
334 /* Skip the "Content-Type:" part and whitespace after it */
335 for (NxtHdr += 13; ((*NxtHdr == ' ') || (*NxtHdr == '\t')); NxtHdr++);
338 * Get the full value of the Content-Type header;
339 * it might span multiple lines. So search for
340 * a newline char, but ignore those that have a
341 * have a TAB or space just after the NL (continued
346 p=strchr((p+1),'\n');
347 } while ( (p != NULL) && ((*(p+1) == '\t') || (*(p+1) == ' ')) );
348 if (p == NULL) p = NxtHdr + strlen(NxtHdr);
350 CntType = (char *)xmalloc(p-NxtHdr+2);
351 strncpy(CntType, NxtHdr, (p-NxtHdr));
352 *(CntType+(p-NxtHdr)) = '\0';
355 else if (strncasecmp("MIME-Version:", NxtHdr, 13) == 0) {
358 MimeVer = (char *)xmalloc(strlen(p) + 1);
364 NxtHdr = (strchr(NxtHdr, '\n'));
365 if (NxtHdr != NULL) NxtHdr++;
366 } while ((NxtHdr != NULL) && (*NxtHdr) && (HdrsFound != 3));
369 /* Done looking through the headers, now check what they say */
370 if ((MimeVer != NULL) && (strcmp(MimeVer, "1.0") == 0)) {
372 /* Check Content-Type to see if this is a multipart message */
373 if (CntType != NULL) {
374 if ((strncasecmp(CntType, "multipart/", 10) == 0) ||
375 (strncasecmp(CntType, "message/", 8) == 0)) {
379 /* Search for "boundary=" */
380 p1 = strchr(CntType, '=');
382 /* Skip the '=' and any whitespace after it */
383 for (p1++; (isspace(*p1)); p1++);
385 /* The delimiter might be inside quotes */
388 p2 = strchr(p1, '\"');
393 if (strlen(p1) > 0) {
394 /* The actual delimiter is "--" followed by
395 the boundary string */
396 strcpy(MultipartDelimiter, "--");
397 strncat(MultipartDelimiter, p1, MAX_DELIM_LEN);
398 BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE);
405 * Check Content-Transfer-Encoding, but
406 * ONLY for non-multipart messages (BodyType == 0).
408 if ((XferEnc != NULL) && (BodyType == 0)) {
409 if (strcasecmp(XferEnc, "quoted-printable") == 0) {
410 CurrEncodingIsQP = 1;
411 BodyType = (MSG_IS_8BIT | MSG_NEEDS_DECODE);
412 SetEncoding8bit(XferEncOfs);
414 else if (strcasecmp(XferEnc, "7bit") == 0) {
415 CurrEncodingIsQP = 0;
416 BodyType = (MSG_IS_7BIT);
418 else if (strcasecmp(XferEnc, "8bit") == 0) {
419 CurrEncodingIsQP = 0;
420 BodyType = (MSG_IS_8BIT);
426 if (MimeVer) free(MimeVer);
427 if (XferEnc) free(XferEnc);
428 if (CntType) free(CntType);
435 * Decode one line of data containing QP data.
436 * Return flag set if this line ends with a soft line-break.
437 * 'bufp' is modified to point to the end of the output buffer.
439 static int DoOneQPLine(unsigned char **bufp, int collapsedoubledot)
441 unsigned char *buf = *bufp;
442 unsigned char *p_in, *p_out, *p;
447 if (collapsedoubledot && (strncmp(buf, "..", 2) == 0))
450 for (p_out = buf; (*p_in); ) {
451 p = strchr(p_in, '=');
453 /* No more QP data, just move remainder into place */
455 memmove(p_out, p_in, n);
456 p_in += n; p_out += n;
460 /* There are some uncoded chars at the beginning. */
462 memmove(p_out, p_in, n);
467 case '\0': case '\r': case '\n':
468 /* Soft line break, skip '=' */
470 if (*p_in == '\r') p_in++;
471 if (*p_in == '\n') p_in++;
476 /* There is a QP encoded byte */
477 if (qp_char(*(p+1), *(p+2), p_out) == 0) {
481 /* Invalid QP data - pass through unchanged. */
497 /* This is called once per line in the message body. We need to scan
498 * all lines in the message body for the multipart delimiter string,
499 * and handle any body-part headers in such messages (these can toggle
500 * qp-decoding on and off).
502 * Note: Messages that are NOT multipart-messages go through this
503 * routine quickly, since BodyState will always be S_BODY_DATA,
504 * and MultipartDelimiter is NULL.
506 * Return flag set if this line ends with a soft line-break.
507 * 'bufp' is modified to point to the end of the output buffer.
510 int UnMimeBodyline(unsigned char **bufp, int collapsedoubledot)
512 unsigned char *buf = *bufp;
517 UnMimeHeader(buf); /* Headers in body-parts can be encoded, too! */
518 if (strncasecmp("Content-Transfer-Encoding:", buf, 26) == 0) {
521 XferEnc = nxtaddr(buf);
522 if ((XferEnc != NULL) && (strcasecmp(XferEnc, "quoted-printable") == 0)) {
523 CurrEncodingIsQP = 1;
524 SetEncoding8bit(buf);
527 else if ((*buf == '\0') || (*buf == '\n') || (strcmp(buf, "\r\n") == 0))
528 BodyState = S_BODY_DATA;
530 *bufp = (buf + strlen(buf));
534 if ((*MultipartDelimiter) &&
535 (strncmp(buf, MultipartDelimiter, strlen(MultipartDelimiter)) == 0)) {
536 BodyState = S_BODY_HDR;
537 CurrEncodingIsQP = 0;
540 if (CurrEncodingIsQP)
541 ret = DoOneQPLine(bufp, collapsedoubledot);
543 *bufp = (buf + strlen(buf));
555 char *program_name = "unmime";
557 #define BUFSIZE_INCREMENT 4096
560 #define DBG_FWRITE(B,L,BS,FD) fwrite(B, L, BS, FD)
562 #define DBG_FWRITE(B,L,BS,FD)
565 int main(int argc, char *argv[])
567 unsigned int BufSize;
568 unsigned char *buffer, *buf_p;
569 int nl_count, i, bodytype;
573 FILE *fd_orig, *fd_conv;
577 sprintf(fnam, "/tmp/i_unmime.%x", pid);
578 fd_orig = fopen(fnam, "w");
579 sprintf(fnam, "/tmp/o_unmime.%x", pid);
580 fd_conv = fopen(fnam, "w");
583 BufSize = BUFSIZE_INCREMENT; /* Initial size of buffer */
584 buf_p = buffer = (unsigned char *) xmalloc(BufSize);
588 i = fread(buf_p, 1, 1, stdin);
603 if ((buf_p - buffer) == BufSize) {
604 /* Buffer is full! Get more room. */
605 buffer = xrealloc(buffer, BufSize+BUFSIZE_INCREMENT);
606 buf_p = buffer + BufSize;
607 BufSize += BUFSIZE_INCREMENT;
609 } while ((i > 0) && (nl_count < 2));
612 DBG_FWRITE(buffer, strlen(buffer), 1, fd_orig);
614 UnMimeHeader(buffer);
615 bodytype = MimeBodyType(buffer);
618 fwrite(buffer, i, 1, stdout);
619 DBG_FWRITE(buffer, i, 1, fd_conv);
622 buf_p = (buffer - 1);
625 i = fread(buf_p, 1, 1, stdin);
626 } while ((i == 1) && (*buf_p != '\n'));
629 DBG_FWRITE(buf, (buf_p - buffer), 1, fd_orig);
631 if (buf_p > buffer) {
632 if (bodytype & MSG_NEEDS_DECODE) {
634 UnMimeBodyline(&buf_p, 0);
636 fwrite(buffer, (buf_p - buffer), 1, stdout);
637 DBG_FWRITE(buffer, (buf_p - buffer), 1, fd_conv);
639 } while (buf_p > buffer);