]> Pileus Git - ~andy/fetchmail/blobdiff - rfc822.c
Better backslash handling in RFC822 strings.
[~andy/fetchmail] / rfc822.c
index b6103c8c77659defe45f070f9f07ba318058470d..5f8e21b756cb712c23e100e54e2c41a5e4931c99 100644 (file)
--- a/rfc822.c
+++ b/rfc822.c
 #include  <stdlib.h>
 #endif
 
+#include "config.h"
 #include "fetchmail.h"
+#include "i18n.h"
 
 #define HEADER_END(p)  ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
 
 #ifdef TESTMAIN
 static int verbose;
+char *program_name = "rfc822";
 #endif /* TESTMAIN */
 
-void reply_hack(buf, host)
+unsigned char *reply_hack(buf, host)
 /* hack message headers so replies will work properly */
-char *buf;             /* header to be hacked */
-const char *host;      /* server hostname */
+unsigned char *buf;            /* header to be hacked */
+const unsigned char *host;     /* server hostname */
 {
-    char *from, *cp;
+    unsigned char *from, *cp, last_nws = '\0', *parens_from = NULL;
     int parendepth, state, has_bare_name_part, has_host_part;
+#ifndef TESTMAIN
+    int addresscount = 1;
+#endif /* TESTMAIN */
 
-    if (strncasecmp("From: ", buf, 6)
-       && strncasecmp("To: ", buf, 4)
-       && strncasecmp("Reply-To: ", buf, 10)
-       && strncasecmp("Return-Path: ", buf, 13)
-       && strncasecmp("Cc: ", buf, 4)
-       && strncasecmp("Bcc: ", buf, 5)) {
-       return;
+    if (strncasecmp("From:", buf, 5)
+       && strncasecmp("To:", buf, 3)
+       && strncasecmp("Reply-To:", buf, 9)
+       && strncasecmp("Return-Path:", buf, 12)
+       && strncasecmp("Cc:", buf, 3)
+       && strncasecmp("Bcc:", buf, 4)
+       && strncasecmp("Resent-From:", buf, 12)
+       && strncasecmp("Resent-To:", buf, 10)
+       && strncasecmp("Resent-Cc:", buf, 10)
+       && strncasecmp("Resent-Bcc:", buf, 11)
+       && strncasecmp("Apparently-From:", buf, 16)
+       && strncasecmp("Apparently-To:", buf, 14)
+       && strncasecmp("Sender:", buf, 7)
+       && strncasecmp("Resent-Sender:", buf, 14)
+       ) {
+       return(buf);
     }
 
+#ifndef TESTMAIN
+    if (outlevel >= O_DEBUG)
+       report_build(stdout, _("About to rewrite %s"), buf);
+
+    /* make room to hack the address; buf must be malloced */
+    for (cp = buf; *cp; cp++)
+       if (*cp == ',' || isspace(*cp))
+           addresscount++;
+    buf = (unsigned char *)xrealloc(buf, strlen(buf) + addresscount * strlen(host) + 1);
+#endif /* TESTMAIN */
+
+    /*
+     * This is going to foo up on some ill-formed addresses.
+     * Note that we don't rewrite the fake address <> in order to
+     * avoid screwing up bounce suppression with a null Return-Path.
+     */
+
     parendepth = state = 0;
     has_host_part = has_bare_name_part = FALSE;
     for (from = buf; *from; from++)
@@ -49,10 +81,12 @@ const char *host;   /* server hostname */
        }
 #endif /* TESTMAIN */
        if (state != 2)
+       {
            if (*from == '(')
                ++parendepth;
            else if (*from == ')')
                --parendepth;
+       }
 
        if (!parendepth && !has_host_part)
            switch (state)
@@ -63,6 +97,8 @@ const char *host;     /* server hostname */
                break;
 
            case 1:     /* we've seen the colon, we're looking for addresses */
+               if (!isspace(*from))
+                   last_nws = *from;
                if (*from == '<')
                    state = 3;
                else if (*from == '@')
@@ -70,15 +106,22 @@ const char *host;  /* server hostname */
                else if (*from == '"')
                    state = 2;
                /*
-                * Not expanding on from[-1] == ';' deals with groupnames,
+                * Not expanding on last non-WS == ';' deals with groupnames,
                 * an obscure misfeature described in sections
                 * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
                 */
-               else if ((*from == ',' || HEADER_END(from)) && has_bare_name_part && !has_host_part && from[-1] != ';')
+               else if ((*from == ',' || HEADER_END(from))
+                        && has_bare_name_part
+                        && !has_host_part
+                        && last_nws != ';')
                {
                    int hostlen;
+                   unsigned char *p;
 
-                   while (isspace(*from))
+                   p = from;
+                   if (parens_from)
+                       from = parens_from;
+                   while (isspace(*from) || (*from == ','))
                        --from;
                    from++;
                    hostlen = strlen(host);
@@ -86,47 +129,82 @@ const char *host;  /* server hostname */
                        cp[hostlen+1] = *cp;
                    *from++ = '@';
                    memcpy(from, host, hostlen);
-                   from += strlen(from);
+                   from = p + hostlen + 1;
                    has_host_part = TRUE;
                } 
+               else if (from[1] == '('
+                        && has_bare_name_part
+                        && !has_host_part
+                        && last_nws != ';' && last_nws != ')')
+               {
+                   parens_from = from;
+               } 
                else if (!isspace(*from))
                    has_bare_name_part = TRUE;
                break;
 
            case 2:     /* we're in a string */
                if (*from == '"')
-                   state = 1;
+               {
+                   char        *bp;
+                   int         bscount;
+
+                   bscount = 0;
+                   for (bp = from - 1; *bp == '\\'; bp--)
+                       bscount++;
+                   if (bscount % 2)
+                       state = 1;
+               }
                break;
 
            case 3:     /* we're in a <>-enclosed address */
                if (*from == '@')
                    has_host_part = TRUE;
-               else if (*from == '>' && !has_host_part)
+               else if (*from == '>' && from[-1] != '<')
                {
-                   int hostlen;
-
-                   hostlen = strlen(host);
-                   for (cp = from + strlen(from); cp >= from; --cp)
-                       cp[hostlen+1] = *cp;
-                   *from++ = '@';
-                   memcpy(from, host, hostlen);
-                   from += strlen(from);
-                   has_host_part = TRUE;
+                   state = 1;
+                   if (!has_host_part)
+                   {
+                       int hostlen;
+
+                       hostlen = strlen(host);
+                       for (cp = from + strlen(from); cp >= from; --cp)
+                           cp[hostlen+1] = *cp;
+                       *from++ = '@';
+                       memcpy(from, host, hostlen);
+                       from += hostlen;
+                       has_host_part = TRUE;
+                   }
                }
                break;
            }
+
+       /*
+        * If we passed a comma, reset everything.
+        */
+       if (from[-1] == ',' && !parendepth) {
+         has_host_part = has_bare_name_part = FALSE;
+         parens_from = NULL;
+       }
     }
+
+#ifndef TESTMAIN
+    if (outlevel >= O_DEBUG)
+       report_complete(stdout, _("Rewritten version is %s\n"), buf);
+#endif /* TESTMAIN */
+    return(buf);
 }
 
-char *nxtaddr(hdr)
+unsigned char *nxtaddr(hdr)
 /* parse addresses in succession out of a specified RFC822 header */
-const char *hdr;       /* header to be parsed, NUL to continue previous hdr */
+const unsigned char *hdr;      /* header to be parsed, NUL to continue previous hdr */
 {
-    static char *tp, address[POPBUFSIZE+1];
-    static const char *hp;
+    static unsigned char address[POPBUFSIZE+1];
+    static int tp;
+    static const unsigned char *hp;
     static int state, oldstate;
 #ifdef TESTMAIN
-    static const char *orighdr;
+    static const unsigned char *orighdr;
 #endif /* TESTMAIN */
     int parendepth = 0;
 
@@ -138,6 +216,8 @@ const char *hdr;    /* header to be parsed, NUL to continue previous hdr */
 #define INSIDE_BRACKETS        5       /* inside bracketed address */
 #define ENDIT_ALL      6       /* after last address */
 
+#define NEXTTP()       ((tp < sizeof(address)-1) ? tp++ : tp)
+
     if (hdr)
     {
        hp = hdr;
@@ -145,7 +225,7 @@ const char *hdr;    /* header to be parsed, NUL to continue previous hdr */
 #ifdef TESTMAIN
        orighdr = hdr;
 #endif /* TESTMAIN */
-       tp = address;
+       tp = 0;
     }
 
     for (; *hp; hp++)
@@ -163,20 +243,22 @@ const char *hdr;  /* header to be parsed, NUL to continue previous hdr */
        else if (HEADER_END(hp))
        {
            state = ENDIT_ALL;
-           if (tp > address)
+           if (tp)
            {
-               while (isspace(*--tp))
+               while (isspace(address[--tp]))
                    continue;
-               *++tp = '\0';
+               address[++tp] = '\0';
+               tp = 0;
+               return (address);
            }
-           return(tp > address ? (tp = address) : (char *)NULL);
+           return((unsigned char *)NULL);
        }
        else if (*hp == '\\')           /* handle RFC822 escaping */
        {
            if (state != INSIDE_PARENS)
            {
-               *tp++ = *hp++;                  /* take the escape */
-               *tp++ = *hp;                    /* take following char */
+               address[NEXTTP()] = *hp++;      /* take the escape */
+               address[NEXTTP()] = *hp;        /* take following unsigned char */
            }
        }
        else switch (state)
@@ -191,7 +273,7 @@ const char *hdr;    /* header to be parsed, NUL to continue previous hdr */
            {
                oldstate = SKIP_JUNK;
                state = INSIDE_DQUOTE;
-               *tp++ = *hp;
+               address[NEXTTP()] = *hp;
            }
            else if (*hp == '(')        /* address comment -- ignore */
            {
@@ -202,7 +284,7 @@ const char *hdr;    /* header to be parsed, NUL to continue previous hdr */
            else if (*hp == '<')        /* begin <address> */
            {
                state = INSIDE_BRACKETS;
-               tp = address;
+               tp = 0;
            }
            else if (*hp != ',' && !isspace(*hp))
            {
@@ -214,11 +296,12 @@ const char *hdr;  /* header to be parsed, NUL to continue previous hdr */
        case BARE_ADDRESS:      /* collecting address without delimiters */
            if (*hp == ',')     /* end of address */
            {
-               if (tp > address)
+               if (tp)
                {
-                   *tp++ = '\0';
+                   address[NEXTTP()] = '\0';
                    state = SKIP_JUNK;
-                   return(tp = address);
+                   tp = 0;
+                   return(address);
                }
            }
            else if (*hp == '(')        /* beginning of comment */
@@ -230,18 +313,18 @@ const char *hdr;  /* header to be parsed, NUL to continue previous hdr */
            else if (*hp == '<')        /* beginning of real address */
            {
                state = INSIDE_BRACKETS;
-               tp = address;
+               tp = 0;
            }
            else if (!isspace(*hp))     /* just take it, ignoring whitespace */
-               *tp++ = *hp;
+               address[NEXTTP()] = *hp;
            break;
 
        case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
            if (*hp != '"')
-               *tp++ = *hp;
+               address[NEXTTP()] = *hp;
            else
            {
-               *tp++ = *hp;
+               address[NEXTTP()] = *hp;
                state = oldstate;
            }
            break;
@@ -258,21 +341,22 @@ const char *hdr;  /* header to be parsed, NUL to continue previous hdr */
        case INSIDE_BRACKETS:   /* possible <>-enclosed address */
            if (*hp == '>')     /* end of address */
            {
-               *tp++ = '\0';
+               address[NEXTTP()] = '\0';
                state = SKIP_JUNK;
                ++hp;
-               return(tp = address);
+               tp = 0;
+               return(address);
            }
            else if (*hp == '<')        /* nested <> */
-               tp = address;
+               tp = 0;
            else if (*hp == '"')        /* quoted address */
            {
-               *tp++ = *hp;
+               address[NEXTTP()] = *hp;
                oldstate = INSIDE_BRACKETS;
                state = INSIDE_DQUOTE;
            }
            else                        /* just copy address */
-               *tp++ = *hp;
+               address[NEXTTP()] = *hp;
            break;
        }
     }
@@ -281,9 +365,9 @@ const char *hdr;    /* header to be parsed, NUL to continue previous hdr */
 }
 
 #ifdef TESTMAIN
-static void parsebuf(char *longbuf, int reply)
+static void parsebuf(unsigned char *longbuf, int reply)
 {
-    char       *cp;
+    unsigned char      *cp;
 
     if (reply)
     {
@@ -291,19 +375,19 @@ static void parsebuf(char *longbuf, int reply)
        printf("Rewritten buffer: %s", longbuf);
     }
     else
-       if ((cp = nxtaddr(longbuf)) != (char *)NULL)
+       if ((cp = nxtaddr(longbuf)) != (unsigned char *)NULL)
            do {
                printf("\t-> \"%s\"\n", cp);
            } while
-               ((cp = nxtaddr((char *)NULL)) != (char *)NULL);
+               ((cp = nxtaddr((unsigned char *)NULL)) != (unsigned char *)NULL);
 }
 
 
 
 main(int argc, char *argv[])
 {
-    char       buf[MSGBUFSIZE], longbuf[BUFSIZ];
-    int                ch, reply;
+    unsigned char      buf[MSGBUFSIZE], longbuf[BUFSIZ];
+    int                        ch, reply;
     
     verbose = reply = FALSE;
     while ((ch = getopt(argc, argv, "rv")) != EOF)