X-Git-Url: http://pileus.org/git/?a=blobdiff_plain;f=rfc822.c;h=4efba8349eb6a79c494cae0d95aa54e9b72fb7dc;hb=98cfcef26048bba06975e68a1aad05a8bac0d65d;hp=0976c701e1274363fbfb17ab6dc7a7c4adb1c0b7;hpb=b683ab33c98b903467407487a3c0f74c400f32ea;p=~andy%2Ffetchmail diff --git a/rfc822.c b/rfc822.c index 0976c701..4efba834 100644 --- a/rfc822.c +++ b/rfc822.c @@ -1,58 +1,123 @@ -/* - * rfc822.c -- code for slicing and dicing RFC822 mail headers - * - * Copyright 1997 by Eric S. Raymond - * For license terms, see the file COPYING in this directory. - */ +/***************************************************************************** + +NAME: + rfc822.c -- code for slicing and dicing RFC822 mail headers + +ENTRY POINTS: + nextaddr() -- parse the next address out of an RFC822 header + reply_hack() -- append hostname to local header addresses + +THEORY: + How to parse RFC822 headers in C. This is not a fully conformant +implementation of RFC822 or RFC2822, but it has been in production use +in a widely-deployed MTA (fetcmail) since 1996 without complaints. +Really perverse combinations of quoting and commenting could break it. + +AUTHOR: + Eric S. Raymond , 1997. This source code example +is part of fetchmail and the Unix Cookbook, and are released under the +MIT license. Compile with -DMAIN to build the demonstrator. + +******************************************************************************/ + +#define _XOPEN_SOURCE 600 +#define __BSD_VISIBLE 1 + +#include "config.h" +#include "fetchmail.h" #include #include #include -#if defined(STDC_HEADERS) +#include #include -#endif -#include "fetchmail.h" +#include "sdump.h" + +#ifndef MAIN +#include "gettext.h" +#else +#include +static int verbose; +const char *program_name = "rfc822"; +#endif /* MAIN */ + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif #define HEADER_END(p) ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t')) -#ifdef TESTMAIN -static int verbose; -#endif /* TESTMAIN */ +#define BEFORE_EOL(s) (strcspn((s), "\r\n")) -void reply_hack(buf, host) +char *reply_hack( + char *buf /* header to be hacked */, + const char *host /* server hostname */, + size_t *length) /* hack message headers so replies will work properly */ -char *buf; /* header to be hacked */ -const char *host; /* server hostname */ { - char *from, *cp; + char *from, *cp, last_nws = '\0', *parens_from = NULL; int parendepth, state, has_bare_name_part, has_host_part; +#ifndef MAIN + int addresscount = 1; +#endif /* MAIN */ + + if (strncasecmp("From:", buf, 5) + && strncasecmp("To:", buf, 3) + && strncasecmp("Reply-To:", buf, 9) + && strncasecmp("Return-Path:", buf, 12) + && strncasecmp("Cc:", buf, 3) + && strncasecmp("Bcc:", buf, 4) + && strncasecmp("Resent-From:", buf, 12) + && strncasecmp("Resent-To:", buf, 10) + && strncasecmp("Resent-Cc:", buf, 10) + && strncasecmp("Resent-Bcc:", buf, 11) + && strncasecmp("Apparently-From:", buf, 16) + && strncasecmp("Apparently-To:", buf, 14) + && strncasecmp("Sender:", buf, 7) + && strncasecmp("Resent-Sender:", buf, 14) + ) { + return(buf); + } - if (strncasecmp("From: ", buf, 6) - && strncasecmp("To: ", buf, 4) - && strncasecmp("Reply-To: ", buf, 10) - && strncasecmp("Return-Path: ", buf, 13) - && strncasecmp("Cc: ", buf, 4) - && strncasecmp("Bcc: ", buf, 5)) { - return; +#ifndef MAIN + if (outlevel >= O_DEBUG) { + report_build(stdout, GT_("About to rewrite %s...\n"), (cp = sdump(buf, BEFORE_EOL(buf)))); + xfree(cp); } + /* make room to hack the address; buf must be malloced */ + for (cp = buf; *cp; cp++) + if (*cp == ',' || isspace((unsigned char)*cp)) + addresscount++; + buf = (char *)xrealloc(buf, strlen(buf) + addresscount * (strlen(host) + 1) + 1); +#endif /* MAIN */ + + /* + * This is going to foo up on some ill-formed addresses. + * Note that we don't rewrite the fake address <> in order to + * avoid screwing up bounce suppression with a null Return-Path. + */ + parendepth = state = 0; has_host_part = has_bare_name_part = FALSE; for (from = buf; *from; from++) { -#ifdef TESTMAIN +#ifdef MAIN if (verbose) { printf("state %d: %s", state, buf); - printf("%*s^\n", from - buf + 10, " "); + printf("%*s^\n", (int)(from - buf + 10), " "); } -#endif /* TESTMAIN */ +#endif /* MAIN */ if (state != 2) + { if (*from == '(') ++parendepth; else if (*from == ')') --parendepth; + } if (!parendepth && !has_host_part) switch (state) @@ -63,22 +128,31 @@ const char *host; /* server hostname */ break; case 1: /* we've seen the colon, we're looking for addresses */ + if (!isspace((unsigned char)*from)) + last_nws = *from; if (*from == '<') state = 3; - else if (*from == '@') + else if (*from == '@' || *from == '!') has_host_part = TRUE; else if (*from == '"') state = 2; /* - * Not expanding on from[-1] == ';' deals with groupnames, + * Not expanding on last non-WS == ';' deals with groupnames, * an obscure misfeature described in sections * 6.1, 6.2.6, and A.1.5 of the RFC822 standard. */ - else if ((*from == ',' || HEADER_END(from)) && has_bare_name_part && !has_host_part && from[-1] != ';') + else if ((*from == ',' || HEADER_END(from)) + && has_bare_name_part + && !has_host_part + && last_nws != ';') { int hostlen; + char *p; - while (isspace(*from)) + p = from; + if (parens_from) + from = parens_from; + while (isspace((unsigned char)*from) || (*from == ',')) --from; from++; hostlen = strlen(host); @@ -86,48 +160,87 @@ const char *host; /* server hostname */ cp[hostlen+1] = *cp; *from++ = '@'; memcpy(from, host, hostlen); - from += strlen(from); + from = p + hostlen + 1; has_host_part = TRUE; } - else if (!isspace(*from)) + else if (from[1] == '(' + && has_bare_name_part + && !has_host_part + && last_nws != ';' && last_nws != ')') + { + parens_from = from; + } + else if (!isspace((unsigned char)*from)) has_bare_name_part = TRUE; break; case 2: /* we're in a string */ if (*from == '"') - state = 1; + { + char *bp; + int bscount; + + bscount = 0; + for (bp = from - 1; *bp == '\\'; bp--) + bscount++; + if (!(bscount % 2)) + state = 1; + } break; case 3: /* we're in a <>-enclosed address */ - if (*from == '@') + if (*from == '@' || *from == '!') has_host_part = TRUE; - else if (*from == '>' && !has_host_part) + else if (*from == '>' && (from > buf && from[-1] != '<')) { - int hostlen; - - hostlen = strlen(host); - for (cp = from + strlen(from); cp >= from; --cp) - cp[hostlen+1] = *cp; - *from++ = '@'; - memcpy(from, host, hostlen); - from += strlen(from); - has_host_part = TRUE; + state = 1; + if (!has_host_part) + { + int hostlen; + + hostlen = strlen(host); + for (cp = from + strlen(from); cp >= from; --cp) + cp[hostlen+1] = *cp; + *from++ = '@'; + memcpy(from, host, hostlen); + from += hostlen; + has_host_part = TRUE; + } } break; } + + /* + * If we passed a comma, reset everything. + */ + if ((from > buf && from[-1] == ',') && !parendepth) { + has_host_part = has_bare_name_part = FALSE; + parens_from = NULL; + } + } + +#ifndef MAIN + if (outlevel >= O_DEBUG) { + report_complete(stdout, GT_("...rewritten version is %s.\n"), + (cp = sdump(buf, BEFORE_EOL(buf)))); + xfree(cp) } + +#endif /* MAIN */ + *length = strlen(buf); + return(buf); } -char *nxtaddr(hdr) +char *nxtaddr(const char *hdr /* header to be parsed, NUL to continue previous hdr */) /* parse addresses in succession out of a specified RFC822 header */ -const char *hdr; /* header to be parsed, NUL to continue previous hdr */ { - static char *tp, address[POPBUFSIZE+1]; + static char address[BUFSIZ]; + static size_t tp; static const char *hp; static int state, oldstate; -#ifdef TESTMAIN +#ifdef MAIN static const char *orighdr; -#endif /* TESTMAIN */ +#endif /* MAIN */ int parendepth = 0; #define START_HDR 0 /* before header colon */ @@ -138,42 +251,51 @@ const char *hdr; /* header to be parsed, NUL to continue previous hdr */ #define INSIDE_BRACKETS 5 /* inside bracketed address */ #define ENDIT_ALL 6 /* after last address */ +#define NEXTTP() ((tp < sizeof(address)-1) ? tp++ : tp) + if (hdr) { hp = hdr; state = START_HDR; -#ifdef TESTMAIN +#ifdef MAIN orighdr = hdr; -#endif /* TESTMAIN */ - tp = address; +#endif /* MAIN */ + tp = 0; } + if (!hp) return NULL; + for (; *hp; hp++) { -#ifdef TESTMAIN +#ifdef MAIN if (verbose) { printf("state %d: %s", state, orighdr); - printf("%*s^\n", hp - orighdr + 10, " "); + printf("%*s^\n", (int)(hp - orighdr + 10), " "); } -#endif /* TESTMAIN */ +#endif /* MAIN */ if (state == ENDIT_ALL) /* after last address */ return(NULL); else if (HEADER_END(hp)) { state = ENDIT_ALL; - while (isspace(*--tp)) - continue; - *++tp = '\0'; - return(tp > address ? (tp = address) : (char *)NULL); + if (tp) + { + while (tp > 0 && isspace((unsigned char)address[tp - 1])) + tp--; + address[tp] = '\0'; + tp = 0; + return (address); + } + return(NULL); } else if (*hp == '\\') /* handle RFC822 escaping */ { if (state != INSIDE_PARENS) { - *tp++ = *hp++; /* take the escape */ - *tp++ = *hp; /* take following char */ + address[NEXTTP()] = *hp++; /* take the escape */ + address[NEXTTP()] = *hp; /* take following unsigned char */ } } else switch (state) @@ -188,7 +310,7 @@ const char *hdr; /* header to be parsed, NUL to continue previous hdr */ { oldstate = SKIP_JUNK; state = INSIDE_DQUOTE; - *tp++ = *hp; + address[NEXTTP()] = *hp; } else if (*hp == '(') /* address comment -- ignore */ { @@ -199,9 +321,9 @@ const char *hdr; /* header to be parsed, NUL to continue previous hdr */ else if (*hp == '<') /* begin
*/ { state = INSIDE_BRACKETS; - tp = address; + tp = 0; } - else if (*hp != ',' && !isspace(*hp)) + else if (*hp != ',' && !isspace((unsigned char)*hp)) { --hp; state = BARE_ADDRESS; @@ -211,11 +333,12 @@ const char *hdr; /* header to be parsed, NUL to continue previous hdr */ case BARE_ADDRESS: /* collecting address without delimiters */ if (*hp == ',') /* end of address */ { - if (tp > address) + if (tp) { - *tp++ = '\0'; + address[NEXTTP()] = '\0'; state = SKIP_JUNK; - return(tp = address); + tp = 0; + return(address); } } else if (*hp == '(') /* beginning of comment */ @@ -227,20 +350,22 @@ const char *hdr; /* header to be parsed, NUL to continue previous hdr */ else if (*hp == '<') /* beginning of real address */ { state = INSIDE_BRACKETS; - tp = address; + tp = 0; } - else if (!isspace(*hp)) /* just take it, ignoring whitespace */ - *tp++ = *hp; + else if (*hp == '"') /* quoted word, copy verbatim */ + { + oldstate = state; + state = INSIDE_DQUOTE; + address[NEXTTP()] = *hp; + } + else if (!isspace((unsigned char)*hp)) /* just take it, ignoring whitespace */ + address[NEXTTP()] = *hp; break; case INSIDE_DQUOTE: /* we're in a quoted string, copy verbatim */ - if (*hp != '"') - *tp++ = *hp; - else - { - *tp++ = *hp; + address[NEXTTP()] = *hp; + if (*hp == '"') state = oldstate; - } break; case INSIDE_PARENS: /* we're in a parenthesized comment, ignore */ @@ -255,21 +380,22 @@ const char *hdr; /* header to be parsed, NUL to continue previous hdr */ case INSIDE_BRACKETS: /* possible <>-enclosed address */ if (*hp == '>') /* end of address */ { - *tp++ = '\0'; + address[NEXTTP()] = '\0'; state = SKIP_JUNK; ++hp; - return(tp = address); + tp = 0; + return(address); } else if (*hp == '<') /* nested <> */ - tp = address; + tp = 0; else if (*hp == '"') /* quoted address */ { - *tp++ = *hp; + address[NEXTTP()] = *hp; oldstate = INSIDE_BRACKETS; state = INSIDE_DQUOTE; } else /* just copy address */ - *tp++ = *hp; + address[NEXTTP()] = *hp; break; } } @@ -277,29 +403,30 @@ const char *hdr; /* header to be parsed, NUL to continue previous hdr */ return(NULL); } -#ifdef TESTMAIN +#ifdef MAIN static void parsebuf(char *longbuf, int reply) { char *cp; + size_t dummy; if (reply) { - reply_hack(longbuf, "HOSTNAME.NET"); - printf("Rewritten buffer: %s", longbuf); + reply_hack(longbuf, "HOSTNAME.NET", &dummy); + printf("Rewritten buffer: %s", (char *)longbuf); } else if ((cp = nxtaddr(longbuf)) != (char *)NULL) do { - printf("\t-> \"%s\"\n", cp); + printf("\t-> \"%s\"\n", (char *)cp); } while ((cp = nxtaddr((char *)NULL)) != (char *)NULL); } -main(int argc, char *argv[]) +int main(int argc, char *argv[]) { - char buf[MSGBUFSIZE], longbuf[BUFSIZ]; + char buf[BUFSIZ], longbuf[BUFSIZ]; int ch, reply; verbose = reply = FALSE; @@ -315,16 +442,18 @@ main(int argc, char *argv[]) break; } + longbuf[0] = '\0'; + while (fgets(buf, sizeof(buf)-1, stdin)) { if (buf[0] == ' ' || buf[0] == '\t') - strcat(longbuf, buf); + strlcat(longbuf, buf, sizeof(longbuf)); else if (!strncasecmp("From: ", buf, 6) || !strncasecmp("To: ", buf, 4) || !strncasecmp("Reply-", buf, 6) || !strncasecmp("Cc: ", buf, 4) || !strncasecmp("Bcc: ", buf, 5)) - strcpy(longbuf, buf); + strlcpy(longbuf, buf, sizeof(longbuf)); else if (longbuf[0]) { if (verbose) @@ -339,7 +468,8 @@ main(int argc, char *argv[]) fputs(longbuf, stdout); parsebuf(longbuf, reply); } + exit(0); } -#endif /* TESTMAIN */ +#endif /* MAIN */ /* rfc822.c end */