/*
* rfc822.c -- code for slicing and dicing RFC822 mail headers
*
- * Copyright 1996 by Eric S. Raymond
- * All rights reserved.
+ * Copyright 1997 by Eric S. Raymond
* For license terms, see the file COPYING in this directory.
*/
#define HEADER_END(p) ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
-void reply_hack(buf, host)
+#ifdef TESTMAIN
+static int verbose;
+#endif /* TESTMAIN */
+
+char *reply_hack(buf, host)
/* hack message headers so replies will work properly */
char *buf; /* header to be hacked */
const char *host; /* server hostname */
{
- const char *from;
- int parendepth, state, has_host_part;
- char mycopy[MSGBUFSIZE+1];
-
- if (strncmp("From: ", buf, 6)
- && strncmp("To: ", buf, 4)
- && strncmp("Reply-", buf, 6)
- && strncmp("Cc: ", buf, 4)
- && strncmp("Bcc: ", buf, 5)) {
- return;
+ char *from, *cp, last_nws = '\0';
+ int parendepth, state, has_bare_name_part, has_host_part;
+ int addresscount = 1;
+
+ if (strncasecmp("From: ", buf, 6)
+ && strncasecmp("To: ", buf, 4)
+ && strncasecmp("Reply-To: ", buf, 10)
+ && strncasecmp("Return-Path: ", buf, 13)
+ && strncasecmp("Cc: ", buf, 4)
+ && strncasecmp("Bcc: ", buf, 5)
+ && strncasecmp("Resent-From: ", buf, 13)
+ && strncasecmp("Resent-To: ", buf, 11)
+ && strncasecmp("Resent-Cc: ", buf, 11)
+ && strncasecmp("Resent-Bcc: ", buf, 12)
+ && strncasecmp("Apparently-From:", buf, 16)
+ && strncasecmp("Apparently-To:", buf, 14)
+ && strncasecmp("Sender:", buf, 7)
+ && strncasecmp("Resent-Sender:", buf, 14)
+ ) {
+ return(buf);
}
- strcpy(mycopy, buf);
+#ifndef TESTMAIN
+ /* make room to hack the address; buf must be malloced */
+ for (cp = buf; *cp; cp++)
+ if (*cp == ',' || isspace(*cp))
+ addresscount++;
+ buf = (char *)xrealloc(buf, strlen(buf) + addresscount * strlen(host) + 1);
+#endif /* TESTMAIN */
+
+ /*
+ * This is going to foo up on some ill-formed addresses. For example,
+ * "From: John Smith (Systems) <jsmith@domain>" will get rewritten as
+ * "From: John Smith@my.pop.server (Systems) <jsmith@domain>" because
+ * the state machine can't look ahead to the <> part past the comment
+ * and instead treats `John Smith' as a bareword address.
+ */
+
parendepth = state = 0;
- has_host_part = FALSE;
- for (from = mycopy; *from; from++)
+ has_host_part = has_bare_name_part = FALSE;
+ for (from = buf; *from; from++)
{
#ifdef TESTMAIN
- printf("state %d: %s", state, mycopy);
- printf("%*s^\n", from - mycopy + 10, " ");
+ if (verbose)
+ {
+ printf("state %d: %s", state, buf);
+ printf("%*s^\n", from - buf + 10, " ");
+ }
#endif /* TESTMAIN */
- if (*from == '(')
- ++parendepth;
- else if (*from == ')')
- --parendepth;
+ if (state != 2)
+ if (*from == '(')
+ ++parendepth;
+ else if (*from == ')')
+ --parendepth;
- if (!parendepth)
+ if (!parendepth && !has_host_part)
switch (state)
{
- case 0: /* before header colon */
+ case 0: /* before header colon */
if (*from == ':')
state = 1;
break;
- case 1: /* we've seen the colon, we're looking for addresses */
+ case 1: /* we've seen the colon, we're looking for addresses */
+ if (!isspace(*from))
+ last_nws = *from;
if (*from == '<')
- state = 2;
+ state = 3;
else if (*from == '@')
has_host_part = TRUE;
- else if ((*from == ',' || HEADER_END(from)) && !has_host_part)
+ else if (*from == '"')
+ state = 2;
+ /*
+ * Not expanding on last non-WS == ';' deals with groupnames,
+ * an obscure misfeature described in sections
+ * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
+ */
+ else if ((*from == ',' || HEADER_END(from) || from[1] == '(')
+ && has_bare_name_part
+ && !has_host_part
+ && last_nws != ';' && last_nws != ')')
{
- while (isspace(*from))
+ int hostlen;
+
+ while (isspace(*from) || (*from == ','))
--from;
- while (isspace(*buf))
- --buf;
- strcpy(buf, "@");
- strcat(buf, host);
- buf += strlen(buf);
- }
+ from++;
+ hostlen = strlen(host);
+ for (cp = from + strlen(from); cp >= from; --cp)
+ cp[hostlen+1] = *cp;
+ *from++ = '@';
+ memcpy(from, host, hostlen);
+ from += hostlen;
+ has_host_part = TRUE;
+ }
+ else if (!isspace(*from))
+ has_bare_name_part = TRUE;
+ break;
+
+ case 2: /* we're in a string */
+ if (*from == '"')
+ state = 1;
break;
- case 2: /* we're in a <>-enclosed address */
+ case 3: /* we're in a <>-enclosed address */
if (*from == '@')
has_host_part = TRUE;
- else if (*from == '>' && !has_host_part)
+ else if (*from == '>')
{
- strcpy(buf, "@");
- strcat(buf, host);
- buf += strlen(buf);
- has_host_part = TRUE;
+ state = 1;
+ if (!has_host_part)
+ {
+ int hostlen;
+
+ hostlen = strlen(host);
+ for (cp = from + strlen(from); cp >= from; --cp)
+ cp[hostlen+1] = *cp;
+ *from++ = '@';
+ memcpy(from, host, hostlen);
+ from += hostlen;
+ has_host_part = TRUE;
+ }
}
break;
}
- /* all characters from the old buffer get copied to the new one */
- *buf++ = *from;
+ /*
+ * If we passed a comma, reset everything.
+ */
+ if (from[-1] == ',' && !parendepth) {
+ has_host_part = has_bare_name_part = FALSE;
+ }
}
- *buf = '\0';
+ return(buf);
}
char *nxtaddr(hdr)
static char *tp, address[POPBUFSIZE+1];
static const char *hp;
static int state, oldstate;
- int parendepth;
+#ifdef TESTMAIN
+ static const char *orighdr;
+#endif /* TESTMAIN */
+ int parendepth = 0;
#define START_HDR 0 /* before header colon */
#define SKIP_JUNK 1 /* skip whitespace, \n, and junk */
{
hp = hdr;
state = START_HDR;
+#ifdef TESTMAIN
+ orighdr = hdr;
+#endif /* TESTMAIN */
+ tp = address;
}
for (; *hp; hp++)
{
- switch (state)
+#ifdef TESTMAIN
+ if (verbose)
{
- case START_HDR: /* before header colon */
- if (HEADER_END(hp))
+ printf("state %d: %s", state, orighdr);
+ printf("%*s^\n", hp - orighdr + 10, " ");
+ }
+#endif /* TESTMAIN */
+
+ if (state == ENDIT_ALL) /* after last address */
+ return(NULL);
+ else if (HEADER_END(hp))
+ {
+ state = ENDIT_ALL;
+ if (tp > address)
{
- state = ENDIT_ALL;
- return(NULL);
+ while (isspace(*--tp))
+ continue;
+ *++tp = '\0';
}
- else if (*hp == ':')
+ return(tp > address ? (tp = address) : (char *)NULL);
+ }
+ else if (*hp == '\\') /* handle RFC822 escaping */
+ {
+ if (state != INSIDE_PARENS)
{
- state = SKIP_JUNK;
- tp = address;
+ *tp++ = *hp++; /* take the escape */
+ *tp++ = *hp; /* take following char */
}
+ }
+ else switch (state)
+ {
+ case START_HDR: /* before header colon */
+ if (*hp == ':')
+ state = SKIP_JUNK;
break;
case SKIP_JUNK: /* looking for address start */
- if (HEADER_END(hp)) /* no more addresses */
- {
- state = ENDIT_ALL;
- return(NULL);
- }
- else if (*hp == '\\') /* handle RFC822 escaping */
- {
- *tp++ = *hp++; /* take the escape */
- *tp++ = *hp; /* take following char */
- }
- else if (*hp == '"') /* quoted string */
+ if (*hp == '"') /* quoted string */
{
oldstate = SKIP_JUNK;
state = INSIDE_DQUOTE;
else if (*hp == '(') /* address comment -- ignore */
{
parendepth = 1;
+ oldstate = SKIP_JUNK;
state = INSIDE_PARENS;
}
else if (*hp == '<') /* begin <address> */
state = INSIDE_BRACKETS;
tp = address;
}
- else if (!isspace(*hp)) /* ignore space */
+ else if (*hp != ',' && !isspace(*hp))
{
--hp;
state = BARE_ADDRESS;
break;
case BARE_ADDRESS: /* collecting address without delimiters */
- if (HEADER_END(hp)) /* end of bare address */
- {
- if (tp > address)
- {
- while (isspace(*--tp))
- continue;
- *++tp = '\0';
- state = ENDIT_ALL;
- return(tp = address);
- }
- }
- else if (*hp == '\\') /* handle RFC822 escaping */
- {
- *tp++ = *hp++; /* take the escape */
- *tp++ = *hp; /* take following char */
- }
- else if (*hp == ',') /* end of address */
+ if (*hp == ',') /* end of address */
{
if (tp > address)
{
return(tp = address);
}
}
+ else if (*hp == '(') /* beginning of comment */
+ {
+ parendepth = 1;
+ oldstate = BARE_ADDRESS;
+ state = INSIDE_PARENS;
+ }
else if (*hp == '<') /* beginning of real address */
{
state = INSIDE_BRACKETS;
tp = address;
}
- else /* just take it */
+ else if (!isspace(*hp)) /* just take it, ignoring whitespace */
*tp++ = *hp;
break;
case INSIDE_DQUOTE: /* we're in a quoted string, copy verbatim */
- if (HEADER_END(hp)) /* premature end of string */
- {
- state = ENDIT_ALL;
- return(NULL);
- }
- else if (*hp == '\\') /* handle RFC822 escaping */
- {
- *tp++ = *hp++; /* take the escape */
- *tp++ = *hp; /* take following char */
- }
- else if (*hp != '"')
+ if (*hp != '"')
*tp++ = *hp;
else
{
break;
case INSIDE_PARENS: /* we're in a parenthesized comment, ignore */
- if (HEADER_END(hp)) /* end of line, just bomb out */
- {
- state = ENDIT_ALL;
- return(NULL);
- }
- else if (*hp == '\\') /* handle RFC822 escaping */
- {
- *tp++ = *hp++; /* take the escape */
- *tp++ = *hp; /* take following char */
- }
- else if (*hp == '(')
+ if (*hp == '(')
++parendepth;
else if (*hp == ')')
--parendepth;
if (parendepth == 0)
- state = SKIP_JUNK;
+ state = oldstate;
break;
case INSIDE_BRACKETS: /* possible <>-enclosed address */
- if (HEADER_END(hp)) /* end of line, just bomb out */
- {
- state = ENDIT_ALL;
- return(NULL);
- }
- else if (*hp == '\\') /* handle RFC822 escaping */
- {
- *tp++ = *hp++; /* take the escape */
- *tp++ = *hp; /* take following char */
- }
- else if (*hp == '>') /* end of address */
+ if (*hp == '>') /* end of address */
{
*tp++ = '\0';
state = SKIP_JUNK;
else /* just copy address */
*tp++ = *hp;
break;
-
- case ENDIT_ALL: /* after last address */
- return(NULL);
- break;
}
}
}
#ifdef TESTMAIN
+static void parsebuf(char *longbuf, int reply)
+{
+ char *cp;
+
+ if (reply)
+ {
+ reply_hack(longbuf, "HOSTNAME.NET");
+ printf("Rewritten buffer: %s", longbuf);
+ }
+ else
+ if ((cp = nxtaddr(longbuf)) != (char *)NULL)
+ do {
+ printf("\t-> \"%s\"\n", cp);
+ } while
+ ((cp = nxtaddr((char *)NULL)) != (char *)NULL);
+}
+
+
+
main(int argc, char *argv[])
{
- char buf[MSGBUFSIZE], *cp;
- int reply = (argc > 1 && !strcmp(argv[1], "-r"));
+ char buf[MSGBUFSIZE], longbuf[BUFSIZ];
+ int ch, reply;
+
+ verbose = reply = FALSE;
+ while ((ch = getopt(argc, argv, "rv")) != EOF)
+ switch(ch)
+ {
+ case 'r':
+ reply = TRUE;
+ break;
+
+ case 'v':
+ verbose = TRUE;
+ break;
+ }
while (fgets(buf, sizeof(buf)-1, stdin))
{
- if (strncmp("From: ", buf, 6)
- && strncmp("To: ", buf, 4)
- && strncmp("Reply-", buf, 6)
- && strncmp("Cc: ", buf, 4)
- && strncmp("Bcc: ", buf, 5))
- continue;
- else
+ if (buf[0] == ' ' || buf[0] == '\t')
+ strcat(longbuf, buf);
+ else if (!strncasecmp("From: ", buf, 6)
+ || !strncasecmp("To: ", buf, 4)
+ || !strncasecmp("Reply-", buf, 6)
+ || !strncasecmp("Cc: ", buf, 4)
+ || !strncasecmp("Bcc: ", buf, 5))
+ strcpy(longbuf, buf);
+ else if (longbuf[0])
{
- fputs(buf, stdout);
- if (reply)
- {
- reply_hack(buf, "HOSTNAME.NET");
- printf("Rewritten buffer: %s", buf);
- }
- else
- if ((cp = nxtaddr(buf)) != (char *)NULL)
- do {
- printf("\t%s\n", cp);
- } while
- ((cp = nxtaddr((char *)NULL)) != (char *)NULL);
+ if (verbose)
+ fputs(longbuf, stdout);
+ parsebuf(longbuf, reply);
+ longbuf[0] = '\0';
}
-
+ }
+ if (longbuf[0])
+ {
+ if (verbose)
+ fputs(longbuf, stdout);
+ parsebuf(longbuf, reply);
}
}
#endif /* TESTMAIN */