X-Git-Url: http://pileus.org/git/?a=blobdiff_plain;f=rfc822.c;h=4efba8349eb6a79c494cae0d95aa54e9b72fb7dc;hb=98cfcef26048bba06975e68a1aad05a8bac0d65d;hp=8497c54ba3464d9e69f2bb2edf54d7fefc6a5940;hpb=cfcf10f7b0ed01ef10abb87fb172e2a9ce37d9fd;p=~andy%2Ffetchmail diff --git a/rfc822.c b/rfc822.c index 8497c54b..4efba834 100644 --- a/rfc822.c +++ b/rfc822.c @@ -1,167 +1,248 @@ -/* - * rfc822.c -- code for slicing and dicing RFC822 mail headers - * - * Copyright 1996 by Eric S. Raymond - * All rights reserved. - * For license terms, see the file COPYING in this directory. - */ +/***************************************************************************** + +NAME: + rfc822.c -- code for slicing and dicing RFC822 mail headers + +ENTRY POINTS: + nextaddr() -- parse the next address out of an RFC822 header + reply_hack() -- append hostname to local header addresses + +THEORY: + How to parse RFC822 headers in C. This is not a fully conformant +implementation of RFC822 or RFC2822, but it has been in production use +in a widely-deployed MTA (fetcmail) since 1996 without complaints. +Really perverse combinations of quoting and commenting could break it. + +AUTHOR: + Eric S. Raymond , 1997. This source code example +is part of fetchmail and the Unix Cookbook, and are released under the +MIT license. Compile with -DMAIN to build the demonstrator. + +******************************************************************************/ + +#define _XOPEN_SOURCE 600 +#define __BSD_VISIBLE 1 + +#include "config.h" +#include "fetchmail.h" #include #include #include -#if defined(STDC_HEADERS) +#include #include + +#include "sdump.h" + +#ifndef MAIN +#include "gettext.h" +#else +#include +static int verbose; +const char *program_name = "rfc822"; +#endif /* MAIN */ + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 #endif -#include "fetchmail.h" +#define HEADER_END(p) ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t')) -void reply_hack(buf, host) +#define BEFORE_EOL(s) (strcspn((s), "\r\n")) + +char *reply_hack( + char *buf /* header to be hacked */, + const char *host /* server hostname */, + size_t *length) /* hack message headers so replies will work properly */ -char *buf; /* header to be hacked */ -const char *host; /* server hostname */ { - const char *from; - int parendepth, state = 0, tokencount = 0; - char mycopy[POPBUFSIZE+1]; - - if (strncmp("From: ", buf, 6) - && strncmp("To: ", buf, 4) - && strncmp("Reply-", buf, 6) - && strncmp("Cc: ", buf, 4) - && strncmp("Bcc: ", buf, 5)) { - return; + char *from, *cp, last_nws = '\0', *parens_from = NULL; + int parendepth, state, has_bare_name_part, has_host_part; +#ifndef MAIN + int addresscount = 1; +#endif /* MAIN */ + + if (strncasecmp("From:", buf, 5) + && strncasecmp("To:", buf, 3) + && strncasecmp("Reply-To:", buf, 9) + && strncasecmp("Return-Path:", buf, 12) + && strncasecmp("Cc:", buf, 3) + && strncasecmp("Bcc:", buf, 4) + && strncasecmp("Resent-From:", buf, 12) + && strncasecmp("Resent-To:", buf, 10) + && strncasecmp("Resent-Cc:", buf, 10) + && strncasecmp("Resent-Bcc:", buf, 11) + && strncasecmp("Apparently-From:", buf, 16) + && strncasecmp("Apparently-To:", buf, 14) + && strncasecmp("Sender:", buf, 7) + && strncasecmp("Resent-Sender:", buf, 14) + ) { + return(buf); } - strcpy(mycopy, buf); - for (from = mycopy; *from; from++) - { - switch (state) - { - case 0: /* before header colon */ - if (*from == ':') - state = 1; - break; - - case 1: /* we've seen the colon, we're looking for addresses */ - if (*from == '"') - state = 3; - else if (*from == '(') - { - parendepth = 1; - state = 4; - } - else if (*from == '<' || isalnum(*from)) - state = 5; - else if (isspace(*from)) - state = 2; - break; +#ifndef MAIN + if (outlevel >= O_DEBUG) { + report_build(stdout, GT_("About to rewrite %s...\n"), (cp = sdump(buf, BEFORE_EOL(buf)))); + xfree(cp); + } - case 2: /* found a token boundary -- reset without copying */ - if (*from != ' ' && *from != '\t') - { - tokencount++; - state = 1; - --from; - continue; - } + /* make room to hack the address; buf must be malloced */ + for (cp = buf; *cp; cp++) + if (*cp == ',' || isspace((unsigned char)*cp)) + addresscount++; + buf = (char *)xrealloc(buf, strlen(buf) + addresscount * (strlen(host) + 1) + 1); +#endif /* MAIN */ - case 3: /* we're in a quoted human name, copy and ignore */ - if (*from == '"') - state = 1; - break; + /* + * This is going to foo up on some ill-formed addresses. + * Note that we don't rewrite the fake address <> in order to + * avoid screwing up bounce suppression with a null Return-Path. + */ - case 4: /* we're in a parenthesized human name, copy and ignore */ + parendepth = state = 0; + has_host_part = has_bare_name_part = FALSE; + for (from = buf; *from; from++) + { +#ifdef MAIN + if (verbose) + { + printf("state %d: %s", state, buf); + printf("%*s^\n", (int)(from - buf + 10), " "); + } +#endif /* MAIN */ + if (state != 2) + { if (*from == '(') ++parendepth; else if (*from == ')') --parendepth; - if (parendepth == 0) - state = 1; - break; + } - case 5: /* the real work gets done here */ - /* - * We're in something that might be an address part, - * either a bare unquoted/unparenthesized text or text - * enclosed in <> as per RFC822. - */ - /* if the address part contains an @, don't mess with it */ - if (*from == '@') - state = 6; - - /* If the address token is not properly terminated, ignore it. */ - else if (*from == ' ' || *from == '\t') + if (!parendepth && !has_host_part) + switch (state) { - const char *cp; - + case 0: /* before header colon */ + if (*from == ':') + state = 1; + break; + + case 1: /* we've seen the colon, we're looking for addresses */ + if (!isspace((unsigned char)*from)) + last_nws = *from; + if (*from == '<') + state = 3; + else if (*from == '@' || *from == '!') + has_host_part = TRUE; + else if (*from == '"') + state = 2; /* - * The only lookahead case. If we're looking at space or tab, - * we might be looking at a local name immediately followed - * by a human name. + * Not expanding on last non-WS == ';' deals with groupnames, + * an obscure misfeature described in sections + * 6.1, 6.2.6, and A.1.5 of the RFC822 standard. */ - for (cp = from; isspace(*cp); cp++) - continue; - if (*cp == '(') + else if ((*from == ',' || HEADER_END(from)) + && has_bare_name_part + && !has_host_part + && last_nws != ';') { - strcpy(buf, "@"); - strcat(buf, host); - buf += strlen(buf); - state = 1; + int hostlen; + char *p; + + p = from; + if (parens_from) + from = parens_from; + while (isspace((unsigned char)*from) || (*from == ',')) + --from; + from++; + hostlen = strlen(host); + for (cp = from + strlen(from); cp >= from; --cp) + cp[hostlen+1] = *cp; + *from++ = '@'; + memcpy(from, host, hostlen); + from = p + hostlen + 1; + has_host_part = TRUE; + } + else if (from[1] == '(' + && has_bare_name_part + && !has_host_part + && last_nws != ';' && last_nws != ')') + { + parens_from = from; + } + else if (!isspace((unsigned char)*from)) + has_bare_name_part = TRUE; + break; + + case 2: /* we're in a string */ + if (*from == '"') + { + char *bp; + int bscount; + + bscount = 0; + for (bp = from - 1; *bp == '\\'; bp--) + bscount++; + if (!(bscount % 2)) + state = 1; } - } - - /* - * On proper termination with no @, insert hostname. - * Case '>' catches <>-enclosed mail IDs. Case ',' catches - * comma-separated bare IDs. - */ - else if (strchr(">,", *from)) - { - strcpy(buf, "@"); - strcat(buf, host); - buf += strlen(buf); - tokencount = 0; - state = 1; - } + break; - /* a single local name alone on the line */ - else if (*from == '\n' && tokencount == 1) - { - strcpy(buf, "@"); - strcat(buf, host); - buf += strlen(buf); - state = 2; + case 3: /* we're in a <>-enclosed address */ + if (*from == '@' || *from == '!') + has_host_part = TRUE; + else if (*from == '>' && (from > buf && from[-1] != '<')) + { + state = 1; + if (!has_host_part) + { + int hostlen; + + hostlen = strlen(host); + for (cp = from + strlen(from); cp >= from; --cp) + cp[hostlen+1] = *cp; + *from++ = '@'; + memcpy(from, host, hostlen); + from += hostlen; + has_host_part = TRUE; + } + } + break; } - /* everything else, including alphanumerics, just passes through */ - break; - - case 6: /* we're in a remote mail ID, no need to append hostname */ - if (*from == '>' || *from == ',' || isspace(*from)) - state = 1; - break; + /* + * If we passed a comma, reset everything. + */ + if ((from > buf && from[-1] == ',') && !parendepth) { + has_host_part = has_bare_name_part = FALSE; + parens_from = NULL; } + } - /* all characters from the old buffer get copied to the new one */ - *buf++ = *from; +#ifndef MAIN + if (outlevel >= O_DEBUG) { + report_complete(stdout, GT_("...rewritten version is %s.\n"), + (cp = sdump(buf, BEFORE_EOL(buf)))); + xfree(cp) } - *buf++ = '\0'; + +#endif /* MAIN */ + *length = strlen(buf); + return(buf); } -char *nxtaddr(hdr) +char *nxtaddr(const char *hdr /* header to be parsed, NUL to continue previous hdr */) /* parse addresses in succession out of a specified RFC822 header */ -const char *hdr; /* header to be parsed, NUL to continue previous hdr */ { - static char *tp, address[POPBUFSIZE+1]; + static char address[BUFSIZ]; + static size_t tp; static const char *hp; static int state, oldstate; - int parendepth; +#ifdef MAIN + static const char *orighdr; +#endif /* MAIN */ + int parendepth = 0; - /* - * Note 1: RFC822 escaping with \ is *not* handled. Note 2: it is - * important that this routine not stop on \r, since we use \r as - * a marker for RFC822 continuations elsewhere. - */ #define START_HDR 0 /* before header colon */ #define SKIP_JUNK 1 /* skip whitespace, \n, and junk */ #define BARE_ADDRESS 2 /* collecting address without delimiters */ @@ -170,130 +251,151 @@ const char *hdr; /* header to be parsed, NUL to continue previous hdr */ #define INSIDE_BRACKETS 5 /* inside bracketed address */ #define ENDIT_ALL 6 /* after last address */ +#define NEXTTP() ((tp < sizeof(address)-1) ? tp++ : tp) + if (hdr) { hp = hdr; state = START_HDR; +#ifdef MAIN + orighdr = hdr; +#endif /* MAIN */ + tp = 0; } + if (!hp) return NULL; + for (; *hp; hp++) { - switch (state) +#ifdef MAIN + if (verbose) { - case START_HDR: /* before header colon */ - if (*hp == '\n') + printf("state %d: %s", state, orighdr); + printf("%*s^\n", (int)(hp - orighdr + 10), " "); + } +#endif /* MAIN */ + + if (state == ENDIT_ALL) /* after last address */ + return(NULL); + else if (HEADER_END(hp)) + { + state = ENDIT_ALL; + if (tp) { - state = ENDIT_ALL; - return(NULL); + while (tp > 0 && isspace((unsigned char)address[tp - 1])) + tp--; + address[tp] = '\0'; + tp = 0; + return (address); } - else if (*hp == ':') + return(NULL); + } + else if (*hp == '\\') /* handle RFC822 escaping */ + { + if (state != INSIDE_PARENS) { - state = SKIP_JUNK; - tp = address; + address[NEXTTP()] = *hp++; /* take the escape */ + address[NEXTTP()] = *hp; /* take following unsigned char */ } + } + else switch (state) + { + case START_HDR: /* before header colon */ + if (*hp == ':') + state = SKIP_JUNK; break; case SKIP_JUNK: /* looking for address start */ - if (*hp == '\n') /* no more addresses */ - { - state = ENDIT_ALL; - return(NULL); - } - else if (*hp == '"') /* quoted string */ + if (*hp == '"') /* quoted string */ { oldstate = SKIP_JUNK; state = INSIDE_DQUOTE; - *tp++ = *hp; + address[NEXTTP()] = *hp; } else if (*hp == '(') /* address comment -- ignore */ { parendepth = 1; + oldstate = SKIP_JUNK; state = INSIDE_PARENS; } else if (*hp == '<') /* begin
*/ { state = INSIDE_BRACKETS; - tp = address; + tp = 0; } - else if (!isspace(*hp)) /* ignore space */ + else if (*hp != ',' && !isspace((unsigned char)*hp)) { --hp; state = BARE_ADDRESS; } break; - case BARE_ADDRESS: /* collecting address without delimiters */ - if (*hp == '\n') /* end of bare address */ - { - *tp++ = '\0'; - state = ENDIT_ALL; - return(tp = address); - } - else if (*hp == ',' || isspace(*hp)) /* end of address */ + case BARE_ADDRESS: /* collecting address without delimiters */ + if (*hp == ',') /* end of address */ { - if (tp > address) + if (tp) { - *tp++ = '\0'; - ++hp; + address[NEXTTP()] = '\0'; state = SKIP_JUNK; - return(tp = address); + tp = 0; + return(address); } } - else /* just take it */ + else if (*hp == '(') /* beginning of comment */ { - state = BARE_ADDRESS; - *tp++ = *hp; + parendepth = 1; + oldstate = BARE_ADDRESS; + state = INSIDE_PARENS; } - break; - - case INSIDE_DQUOTE: /* we're in a quoted string, copy verbatim */ - if (*hp == '\n') + else if (*hp == '<') /* beginning of real address */ { - state = ENDIT_ALL; - return(NULL); + state = INSIDE_BRACKETS; + tp = 0; } - if (*hp != '"') - *tp++ = *hp; - else + else if (*hp == '"') /* quoted word, copy verbatim */ { - *tp++ = *hp; + oldstate = state; + state = INSIDE_DQUOTE; + address[NEXTTP()] = *hp; + } + else if (!isspace((unsigned char)*hp)) /* just take it, ignoring whitespace */ + address[NEXTTP()] = *hp; + break; + + case INSIDE_DQUOTE: /* we're in a quoted string, copy verbatim */ + address[NEXTTP()] = *hp; + if (*hp == '"') state = oldstate; - } break; - case INSIDE_PARENS: /* we're in a parenthesized comment, ignore */ - if (*hp == '\n') - return(NULL); - else if (*hp == '(') + case INSIDE_PARENS: /* we're in a parenthesized comment, ignore */ + if (*hp == '(') ++parendepth; else if (*hp == ')') --parendepth; if (parendepth == 0) - state = SKIP_JUNK; + state = oldstate; break; - case INSIDE_BRACKETS: /* possible <>-enclosed address */ - if (*hp == '>') /* end of address */ + case INSIDE_BRACKETS: /* possible <>-enclosed address */ + if (*hp == '>') /* end of address */ { - *tp++ = '\0'; + address[NEXTTP()] = '\0'; state = SKIP_JUNK; ++hp; - return(tp = address); + tp = 0; + return(address); } - else if (*hp == '<') /* nested <> */ - tp = address; - else if (*hp == '"') /* quoted address */ + else if (*hp == '<') /* nested <> */ + tp = 0; + else if (*hp == '"') /* quoted address */ { - *tp++ = *hp; + address[NEXTTP()] = *hp; oldstate = INSIDE_BRACKETS; state = INSIDE_DQUOTE; } - else /* just copy address */ - *tp++ = *hp; - break; - - case ENDIT_ALL: /* after last address */ - return(NULL); + else /* just copy address */ + address[NEXTTP()] = *hp; break; } } @@ -301,31 +403,73 @@ const char *hdr; /* header to be parsed, NUL to continue previous hdr */ return(NULL); } -#ifdef TESTMAIN -main(int argc, char *argv[]) +#ifdef MAIN +static void parsebuf(char *longbuf, int reply) { - char buf[POPBUFSIZE], *cp; + char *cp; + size_t dummy; - while (fgets(buf, sizeof(buf)-1, stdin)) + if (reply) { - if (strncmp("From: ", buf, 6) - && strncmp("To: ", buf, 4) - && strncmp("Reply-", buf, 6) - && strncmp("Cc: ", buf, 4) - && strncmp("Bcc: ", buf, 5)) - continue; - else + reply_hack(longbuf, "HOSTNAME.NET", &dummy); + printf("Rewritten buffer: %s", (char *)longbuf); + } + else + if ((cp = nxtaddr(longbuf)) != (char *)NULL) + do { + printf("\t-> \"%s\"\n", (char *)cp); + } while + ((cp = nxtaddr((char *)NULL)) != (char *)NULL); +} + + + +int main(int argc, char *argv[]) +{ + char buf[BUFSIZ], longbuf[BUFSIZ]; + int ch, reply; + + verbose = reply = FALSE; + while ((ch = getopt(argc, argv, "rv")) != EOF) + switch(ch) { - fputs(buf, stdout); - if ((cp = nxtaddr(buf)) != (char *)NULL) - do { - printf("%s\n", cp); - } while - ((cp = nxtaddr((char *)NULL)) != (char *)NULL); + case 'r': + reply = TRUE; + break; + + case 'v': + verbose = TRUE; + break; } + longbuf[0] = '\0'; + + while (fgets(buf, sizeof(buf)-1, stdin)) + { + if (buf[0] == ' ' || buf[0] == '\t') + strlcat(longbuf, buf, sizeof(longbuf)); + else if (!strncasecmp("From: ", buf, 6) + || !strncasecmp("To: ", buf, 4) + || !strncasecmp("Reply-", buf, 6) + || !strncasecmp("Cc: ", buf, 4) + || !strncasecmp("Bcc: ", buf, 5)) + strlcpy(longbuf, buf, sizeof(longbuf)); + else if (longbuf[0]) + { + if (verbose) + fputs(longbuf, stdout); + parsebuf(longbuf, reply); + longbuf[0] = '\0'; + } + } + if (longbuf[0]) + { + if (verbose) + fputs(longbuf, stdout); + parsebuf(longbuf, reply); } + exit(0); } -#endif /* TESTMAIN */ +#endif /* MAIN */ /* rfc822.c end */