]> Pileus Git - ~andy/fetchmail/blobdiff - rfc822.c
Minor bug fixes for socket.c
[~andy/fetchmail] / rfc822.c
index 0471a298623b0e80df1adb685bfe8cfe21f41db1..15b88f05ef476312af280de1d2ec7183ed4768db 100644 (file)
--- a/rfc822.c
+++ b/rfc822.c
-/*
- * rfc822.c -- code for slicing and dicing RFC822 mail headers
- *
- * Copyright 1996 by Eric S. Raymond
- * All rights reserved.
- * For license terms, see the file COPYING in this directory.
- */
+/*****************************************************************************
+
+NAME:
+   rfc822.c -- code for slicing and dicing RFC822 mail headers
+
+ENTRY POINTS:
+   nextaddr() -- parse the next address out of an RFC822 header
+   reply_hack() -- append hostname to local header addresses 
+
+THEORY:
+   How to parse RFC822 headers in C. This is not a fully conformant
+implementation of RFC822 or RFC2822, but it has been in production use
+in a widely-deployed MTA (fetcmail) since 1996 without complaints.
+Really perverse combinations of quoting and commenting could break it.
+
+AUTHOR:
+   Eric S. Raymond <esr@thyrsus.com>, 1997.  This source code example
+is part of fetchmail and the Unix Cookbook, and are released under the
+MIT license.  Compile with -DMAIN to build the demonstrator.
+
+******************************************************************************/
+
+#include "config.h"
 
 #include  <stdio.h>
 #include  <ctype.h>
 #include  <string.h>
-#if defined(STDC_HEADERS)
+#include  <strings.h>
 #include  <stdlib.h>
+
+#include "fetchmail.h"
+#include "sdump.h"
+
+#ifndef MAIN
+#include "i18n.h"
+#else
+#include  <unistd.h>
+static int verbose;
+const char *program_name = "rfc822";
+#endif /* MAIN */
+
+#ifndef TRUE
+#define TRUE 1
+#define FALSE 0
 #endif
 
-#include  "fetchmail.h"
+#define HEADER_END(p)  ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
+
+#define BEFORE_EOL(s)  (strcspn((s), "\r\n"))
 
-void reply_hack(buf, host)
+char *reply_hack(
+       char *buf               /* header to be hacked */,
+       const char *host        /* server hostname */,
+       size_t *length)
 /* hack message headers so replies will work properly */
-char *buf;             /* header to be hacked */
-const char *host;      /* server hostname */
 {
-    const char *from;
-    int parendepth, state = 0, tokencount = 0;
-    char mycopy[POPBUFSIZE+1];
-
-    if (strncmp("From: ", buf, 6)
-       && strncmp("To: ", buf, 4)
-       && strncmp("Reply-", buf, 6)
-       && strncmp("Cc: ", buf, 4)
-       && strncmp("Bcc: ", buf, 5)) {
-       return;
+    char *from, *cp, last_nws = '\0', *parens_from = NULL;
+    int parendepth, state, has_bare_name_part, has_host_part;
+#ifndef MAIN
+    int addresscount = 1;
+#endif /* MAIN */
+
+    if (strncasecmp("From:", buf, 5)
+       && strncasecmp("To:", buf, 3)
+       && strncasecmp("Reply-To:", buf, 9)
+       && strncasecmp("Return-Path:", buf, 12)
+       && strncasecmp("Cc:", buf, 3)
+       && strncasecmp("Bcc:", buf, 4)
+       && strncasecmp("Resent-From:", buf, 12)
+       && strncasecmp("Resent-To:", buf, 10)
+       && strncasecmp("Resent-Cc:", buf, 10)
+       && strncasecmp("Resent-Bcc:", buf, 11)
+       && strncasecmp("Apparently-From:", buf, 16)
+       && strncasecmp("Apparently-To:", buf, 14)
+       && strncasecmp("Sender:", buf, 7)
+       && strncasecmp("Resent-Sender:", buf, 14)
+       ) {
+       return(buf);
     }
 
-    strcpy(mycopy, buf);
-    strcat(mycopy, ",");
-    for (from = mycopy; *from; from++)
-    {
-#ifdef FOO
-       printf("state %d: %s", state, mycopy);
-       printf("%*s^\n", from - mycopy + 10, " ");
-#endif /* TESTMAIN */
-
-#define INSERT_HOSTNAME        \
-               strcpy(buf, "@"); \
-               strcat(buf, host); \
-               buf += strlen(buf); \
-               state = 7;
-
-       switch (state)
-       {
-       case 0:   /* before header colon */
-           if (*from == ':')
-               state = 1;
-           break;
-
-       case 1:   /* we've seen the colon, we're looking for addresses */
-           if (*from == '"')
-               state = 3;
-           else if (*from == '(')
-           {
-               parendepth = 1;
-               state = 4;    
-           }
-           else if (*from == '<')
-               state = 5;
-           else if (isalnum(*from))
-               state = 6;
-           else if (isspace(*from))
-               state = 2;
-           break;
+#ifndef MAIN
+    if (outlevel >= O_DEBUG) {
+       report_build(stdout, GT_("About to rewrite %s...\n"), (cp = sdump(buf, BEFORE_EOL(buf))));
+       xfree(cp);
+    }
 
-       case 2:     /* found a token boundary -- reset without copying */
-           if (!isspace(*from))
-           {
-               tokencount++;
-               state = 1;
-               --from;
-               continue;
-           }
-           break;
+    /* make room to hack the address; buf must be malloced */
+    for (cp = buf; *cp; cp++)
+       if (*cp == ',' || isspace((unsigned char)*cp))
+           addresscount++;
+    buf = (char *)xrealloc(buf, strlen(buf) + addresscount * (strlen(host) + 1) + 1);
+#endif /* MAIN */
 
-       case 3:   /* we're in a quoted human name, copy and ignore */
-           if (*from == '"')
-               state = 1;
-           break;
+    /*
+     * This is going to foo up on some ill-formed addresses.
+     * Note that we don't rewrite the fake address <> in order to
+     * avoid screwing up bounce suppression with a null Return-Path.
+     */
 
-       case 4:   /* we're in a parenthesized human name, copy and ignore */
+    parendepth = state = 0;
+    has_host_part = has_bare_name_part = FALSE;
+    for (from = buf; *from; from++)
+    {
+#ifdef MAIN
+       if (verbose)
+       {
+           printf("state %d: %s", state, buf);
+           printf("%*s^\n", (int)(from - buf + 10), " ");
+       }
+#endif /* MAIN */
+       if (state != 2)
+       {
            if (*from == '(')
                ++parendepth;
            else if (*from == ')')
                --parendepth;
-           if (parendepth == 0)
-               state = 1;
-           break;
-
-       case 5:   /* we're in a <>-enclosed address */
-           if (*from == '@')
-               state = 7;
-           else if (*from == '>')
-           {
-               INSERT_HOSTNAME
-           }
-           break;
-
-       case 6:   /* not string or comment, could be a bare address */
-           if (*from == '@')
-               state = 7;
-
-           else if (*from == '<')
-               state = 5;
-
-           /* on proper termination with no @, insert hostname */
-           else if (*from == ',')
-           {
-               INSERT_HOSTNAME
-               tokencount = 0;
-           }
+       }
 
-           /* If the address token is not properly terminated, ignore it. */
-           else if (isspace(*from))
+       if (!parendepth && !has_host_part)
+           switch (state)
            {
-               const char *cp;
-
+           case 0:     /* before header colon */
+               if (*from == ':')
+                   state = 1;
+               break;
+
+           case 1:     /* we've seen the colon, we're looking for addresses */
+               if (!isspace((unsigned char)*from))
+                   last_nws = *from;
+               if (*from == '<')
+                   state = 3;
+               else if (*from == '@' || *from == '!')
+                   has_host_part = TRUE;
+               else if (*from == '"')
+                   state = 2;
                /*
-                * The only lookahead case.  If we're looking at space or tab,
-                * we might be looking at a local name immediately followed
-                * by a human name.
+                * Not expanding on last non-WS == ';' deals with groupnames,
+                * an obscure misfeature described in sections
+                * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
                 */
-               for (cp = from; isspace(*cp); cp++)
-                   continue;
-               if (*cp == '(')
+               else if ((*from == ',' || HEADER_END(from))
+                        && has_bare_name_part
+                        && !has_host_part
+                        && last_nws != ';')
                {
-                   INSERT_HOSTNAME
+                   int hostlen;
+                   char *p;
+
+                   p = from;
+                   if (parens_from)
+                       from = parens_from;
+                   while (isspace((unsigned char)*from) || (*from == ','))
+                       --from;
+                   from++;
+                   hostlen = strlen(host);
+                   for (cp = from + strlen(from); cp >= from; --cp)
+                       cp[hostlen+1] = *cp;
+                   *from++ = '@';
+                   memcpy(from, host, hostlen);
+                   from = p + hostlen + 1;
+                   has_host_part = TRUE;
+               } 
+               else if (from[1] == '('
+                        && has_bare_name_part
+                        && !has_host_part
+                        && last_nws != ';' && last_nws != ')')
+               {
+                   parens_from = from;
+               } 
+               else if (!isspace((unsigned char)*from))
+                   has_bare_name_part = TRUE;
+               break;
+
+           case 2:     /* we're in a string */
+               if (*from == '"')
+               {
+                   char        *bp;
+                   int         bscount;
+
+                   bscount = 0;
+                   for (bp = from - 1; *bp == '\\'; bp--)
+                       bscount++;
+                   if (!(bscount % 2))
+                       state = 1;
                }
-           }
+               break;
 
-           /* everything else, including alphanumerics, just passes through */
-           break;
-
-       case 7:   /* we're done with this address, skip to end */
-           if (*from == ',')
-           {
-               state = 1;
-               tokencount == 0;
+           case 3:     /* we're in a <>-enclosed address */
+               if (*from == '@' || *from == '!')
+                   has_host_part = TRUE;
+               else if (*from == '>' && (from > buf && from[-1] != '<'))
+               {
+                   state = 1;
+                   if (!has_host_part)
+                   {
+                       int hostlen;
+
+                       hostlen = strlen(host);
+                       for (cp = from + strlen(from); cp >= from; --cp)
+                           cp[hostlen+1] = *cp;
+                       *from++ = '@';
+                       memcpy(from, host, hostlen);
+                       from += hostlen;
+                       has_host_part = TRUE;
+                   }
+               }
+               break;
            }
-           break;
+
+       /*
+        * If we passed a comma, reset everything.
+        */
+       if ((from > buf && from[-1] == ',') && !parendepth) {
+         has_host_part = has_bare_name_part = FALSE;
+         parens_from = NULL;
        }
+    }
 
-       /* all characters from the old buffer get copied to the new one */
-       *buf++ = *from;
+#ifndef MAIN
+    if (outlevel >= O_DEBUG) {
+       report_complete(stdout, GT_("...rewritten version is %s.\n"),
+                       (cp = sdump(buf, BEFORE_EOL(buf))));
+       xfree(cp)
     }
-#undef INSERT_HOSTNAME
 
-    /* back up and nuke the appended comma sentinel */
-    *--buf = '\0';
+#endif /* MAIN */
+    *length = strlen(buf);
+    return(buf);
 }
 
-char *nxtaddr(hdr)
+char *nxtaddr(const char *hdr /* header to be parsed, NUL to continue previous hdr */)
 /* parse addresses in succession out of a specified RFC822 header */
-const char *hdr;       /* header to be parsed, NUL to continue previous hdr */
 {
-    static char *tp, address[POPBUFSIZE+1];
+    static char address[BUFSIZ];
+    static size_t tp;
     static const char *hp;
     static int state, oldstate;
-    int parendepth;
+#ifdef MAIN
+    static const char *orighdr;
+#endif /* MAIN */
+    int parendepth = 0;
 
-    /*
-     * Note: it is important that this routine not stop on \r, since
-     * we use \r as a marker for RFC822 continuations elsewhere.
-     */
 #define START_HDR      0       /* before header colon */
 #define SKIP_JUNK      1       /* skip whitespace, \n, and junk */
 #define BARE_ADDRESS   2       /* collecting address without delimiters */
@@ -177,57 +248,79 @@ const char *hdr;  /* header to be parsed, NUL to continue previous hdr */
 #define INSIDE_BRACKETS        5       /* inside bracketed address */
 #define ENDIT_ALL      6       /* after last address */
 
+#define NEXTTP()       ((tp < sizeof(address)-1) ? tp++ : tp)
+
     if (hdr)
     {
        hp = hdr;
        state = START_HDR;
+#ifdef MAIN
+       orighdr = hdr;
+#endif /* MAIN */
+       tp = 0;
     }
 
+    if (!hp) return NULL;
+
     for (; *hp; hp++)
     {
-       switch (state)
+#ifdef MAIN
+       if (verbose)
        {
-       case START_HDR:   /* before header colon */
-           if (*hp == '\n')
+           printf("state %d: %s", state, orighdr);
+           printf("%*s^\n", (int)(hp - orighdr + 10), " ");
+       }
+#endif /* MAIN */
+
+       if (state == ENDIT_ALL)         /* after last address */
+           return(NULL);
+       else if (HEADER_END(hp))
+       {
+           state = ENDIT_ALL;
+           if (tp)
            {
-               state = ENDIT_ALL;
-               return(NULL);
+               while (tp > 0 && isspace((unsigned char)address[tp - 1]))
+                   tp--;
+               address[tp] = '\0';
+               tp = 0;
+               return (address);
            }
-           else if (*hp == ':')
+           return(NULL);
+       }
+       else if (*hp == '\\')           /* handle RFC822 escaping */
+       {
+           if (state != INSIDE_PARENS)
            {
-               state = SKIP_JUNK;
-               tp = address;
+               address[NEXTTP()] = *hp++;      /* take the escape */
+               address[NEXTTP()] = *hp;        /* take following unsigned char */
            }
+       }
+       else switch (state)
+       {
+       case START_HDR:   /* before header colon */
+           if (*hp == ':')
+               state = SKIP_JUNK;
            break;
 
        case SKIP_JUNK:         /* looking for address start */
-           if (*hp == '\n')            /* no more addresses */
-           {
-               state = ENDIT_ALL;
-               return(NULL);
-           }
-           else if (*hp == '\\')       /* handle RFC822 escaping */
-           {
-               *tp++ = *hp++;                  /* take the escape */
-               *tp++ = *hp;                    /* take following char */
-           }
-           else if (*hp == '"')        /* quoted string */
+           if (*hp == '"')     /* quoted string */
            {
                oldstate = SKIP_JUNK;
                state = INSIDE_DQUOTE;
-               *tp++ = *hp;
+               address[NEXTTP()] = *hp;
            }
            else if (*hp == '(')        /* address comment -- ignore */
            {
                parendepth = 1;
+               oldstate = SKIP_JUNK;
                state = INSIDE_PARENS;    
            }
            else if (*hp == '<')        /* begin <address> */
            {
                state = INSIDE_BRACKETS;
-               tp = address;
+               tp = 0;
            }
-           else if (!isspace(*hp))     /* ignore space */
+           else if (*hp != ',' && !isspace((unsigned char)*hp))
            {
                --hp;
                state = BARE_ADDRESS;
@@ -235,101 +328,71 @@ const char *hdr; /* header to be parsed, NUL to continue previous hdr */
            break;
 
        case BARE_ADDRESS:      /* collecting address without delimiters */
-           if (*hp == '\n')            /* end of bare address */
+           if (*hp == ',')     /* end of address */
            {
-               if (tp > address)
+               if (tp)
                {
-                   *tp++ = '\0';
-                   state = ENDIT_ALL;
-                   return(tp = address);
+                   address[NEXTTP()] = '\0';
+                   state = SKIP_JUNK;
+                   tp = 0;
+                   return(address);
                }
            }
-           else if (*hp == '\\')       /* handle RFC822 escaping */
-           {
-               *tp++ = *hp++;                  /* take the escape */
-               *tp++ = *hp;                    /* take following char */
-           }
-           else if (*hp == ',')        /* end of address */
+           else if (*hp == '(')        /* beginning of comment */
            {
-               if (tp > address)
-               {
-                   *tp++ = '\0';
-                   state = SKIP_JUNK;
-                   return(tp = address);
-               }
+               parendepth = 1;
+               oldstate = BARE_ADDRESS;
+               state = INSIDE_PARENS;    
            }
            else if (*hp == '<')        /* beginning of real address */
            {
                state = INSIDE_BRACKETS;
-               tp = address;
+               tp = 0;
            }
-           else                /* just take it */
-               *tp++ = *hp;
+           else if (*hp == '"')        /* quoted word, copy verbatim */
+           {
+               oldstate = state;
+               state = INSIDE_DQUOTE;
+                address[NEXTTP()] = *hp;
+            }
+           else if (!isspace((unsigned char)*hp))      /* just take it, ignoring whitespace */
+               address[NEXTTP()] = *hp;
            break;
 
        case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
-           if (*hp == '\n')            /* premature end of string */
-           {
-               state = ENDIT_ALL;
-               return(NULL);
-           }
-           else if (*hp == '\\')       /* handle RFC822 escaping */
-           {
-               *tp++ = *hp++;                  /* take the escape */
-               *tp++ = *hp;                    /* take following char */
-           }
-           else if (*hp != '"')
-               *tp++ = *hp;
-           else
-           {
-               *tp++ = *hp;
+           address[NEXTTP()] = *hp;
+           if (*hp == '"')
                state = oldstate;
-           }
            break;
 
        case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
-           if (*hp == '\n')            /* end of line, just bomb out */
-               return(NULL);
-           else if (*hp == '\\')       /* handle RFC822 escaping */
-           {
-               *tp++ = *hp++;                  /* take the escape */
-               *tp++ = *hp;                    /* take following char */
-           }
-           else if (*hp == '(')
+           if (*hp == '(')
                ++parendepth;
            else if (*hp == ')')
                --parendepth;
            if (parendepth == 0)
-               state = SKIP_JUNK;
+               state = oldstate;
            break;
 
        case INSIDE_BRACKETS:   /* possible <>-enclosed address */
-           if (*hp == '\\')            /* handle RFC822 escaping */
-           {
-               *tp++ = *hp++;                  /* take the escape */
-               *tp++ = *hp;                    /* take following char */
-           }
-           else if (*hp == '>')        /* end of address */
+           if (*hp == '>')     /* end of address */
            {
-               *tp++ = '\0';
+               address[NEXTTP()] = '\0';
                state = SKIP_JUNK;
                ++hp;
-               return(tp = address);
+               tp = 0;
+               return(address);
            }
            else if (*hp == '<')        /* nested <> */
-               tp = address;
+               tp = 0;
            else if (*hp == '"')        /* quoted address */
            {
-               *tp++ = *hp;
+               address[NEXTTP()] = *hp;
                oldstate = INSIDE_BRACKETS;
                state = INSIDE_DQUOTE;
            }
            else                        /* just copy address */
-               *tp++ = *hp;
-           break;
-
-       case ENDIT_ALL:         /* after last address */
-           return(NULL);
+               address[NEXTTP()] = *hp;
            break;
        }
     }
@@ -337,38 +400,73 @@ const char *hdr;  /* header to be parsed, NUL to continue previous hdr */
     return(NULL);
 }
 
-#ifdef TESTMAIN
-main(int argc, char *argv[])
+#ifdef MAIN
+static void parsebuf(char *longbuf, int reply)
 {
-    char       buf[POPBUFSIZE], *cp;
-    int                reply =  (argc > 1 && !strcmp(argv[1], "-r"));
+    char       *cp;
+    size_t     dummy;
 
-    while (fgets(buf, sizeof(buf)-1, stdin))
+    if (reply)
     {
-       if (strncmp("From: ", buf, 6)
-                   && strncmp("To: ", buf, 4)
-                   && strncmp("Reply-", buf, 6)
-                   && strncmp("Cc: ", buf, 4)
-                   && strncmp("Bcc: ", buf, 5))
-           continue;
-       else
+       reply_hack(longbuf, "HOSTNAME.NET", &dummy);
+       printf("Rewritten buffer: %s", (char *)longbuf);
+    }
+    else
+       if ((cp = nxtaddr(longbuf)) != (char *)NULL)
+           do {
+               printf("\t-> \"%s\"\n", (char *)cp);
+           } while
+               ((cp = nxtaddr((char *)NULL)) != (char *)NULL);
+}
+
+
+
+int main(int argc, char *argv[])
+{
+    char       buf[BUFSIZ], longbuf[BUFSIZ];
+    int                ch, reply;
+    
+    verbose = reply = FALSE;
+    while ((ch = getopt(argc, argv, "rv")) != EOF)
+       switch(ch)
        {
-           fputs(buf, stdout);
-           if (reply)
-           {
-               reply_hack(buf, "HOSTNAME.NET");
-               printf("Rewritten buffer: %s", buf);
-           }
-           else
-               if ((cp = nxtaddr(buf)) != (char *)NULL)
-                   do {
-                       printf("\t%s\n", cp);
-                   } while
-                       ((cp = nxtaddr((char *)NULL)) != (char *)NULL);
+       case 'r':
+           reply = TRUE;
+           break;
+
+       case 'v':
+           verbose = TRUE;
+           break;
        }
 
+    longbuf[0] = '\0';
+
+    while (fgets(buf, sizeof(buf)-1, stdin))
+    {
+       if (buf[0] == ' ' || buf[0] == '\t')
+           strlcat(longbuf, buf, sizeof(longbuf));
+       else if (!strncasecmp("From: ", buf, 6)
+                   || !strncasecmp("To: ", buf, 4)
+                   || !strncasecmp("Reply-", buf, 6)
+                   || !strncasecmp("Cc: ", buf, 4)
+                   || !strncasecmp("Bcc: ", buf, 5))
+           strlcpy(longbuf, buf, sizeof(longbuf));
+       else if (longbuf[0])
+       {
+           if (verbose)
+               fputs(longbuf, stdout);
+           parsebuf(longbuf, reply);
+           longbuf[0] = '\0';
+       }
+    }
+    if (longbuf[0])
+    {
+       if (verbose)
+           fputs(longbuf, stdout);
+       parsebuf(longbuf, reply);
     }
+    exit(0);
 }
-#endif /* TESTMAIN */
+#endif /* MAIN */
 
 /* rfc822.c end */