1 /*****************************************************************************
4 rfc822.c -- code for slicing and dicing RFC822 mail headers
7 nextaddr() -- parse the next address out of an RFC822 header
8 reply_hack() -- append hostname to local header addresses
11 How to parse RFC822 headers in C. This is not a fully conformant
12 implementation of RFC822 or RFC2822, but it has been in production use
13 in a widely-deployed MTA (fetcmail) since 1996 without complaints.
14 Really perverse combinations of quoting and commenting could break it.
17 Eric S. Raymond <esr@thyrsus.com>, 1997. This source code example
18 is part of fetchmail and the Unix Cookbook, and are released under the
19 MIT license. Compile with -DMAIN to build the demonstrator.
21 ******************************************************************************/
28 #include "fetchmail.h"
32 char *program_name = "rfc822";
40 #define HEADER_END(p) ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
42 unsigned char *reply_hack(buf, host, length)
43 /* hack message headers so replies will work properly */
44 unsigned char *buf; /* header to be hacked */
45 const unsigned char *host; /* server hostname */
48 unsigned char *from, *cp, last_nws = '\0', *parens_from = NULL;
49 int parendepth, state, has_bare_name_part, has_host_part;
54 if (strncasecmp("From:", buf, 5)
55 && strncasecmp("To:", buf, 3)
56 && strncasecmp("Reply-To:", buf, 9)
57 && strncasecmp("Return-Path:", buf, 12)
58 && strncasecmp("Cc:", buf, 3)
59 && strncasecmp("Bcc:", buf, 4)
60 && strncasecmp("Resent-From:", buf, 12)
61 && strncasecmp("Resent-To:", buf, 10)
62 && strncasecmp("Resent-Cc:", buf, 10)
63 && strncasecmp("Resent-Bcc:", buf, 11)
64 && strncasecmp("Apparently-From:", buf, 16)
65 && strncasecmp("Apparently-To:", buf, 14)
66 && strncasecmp("Sender:", buf, 7)
67 && strncasecmp("Resent-Sender:", buf, 14)
73 if (outlevel >= O_DEBUG)
74 report_build(stdout, GT_("About to rewrite %s"), buf);
76 /* make room to hack the address; buf must be malloced */
77 for (cp = buf; *cp; cp++)
78 if (*cp == ',' || isspace(*cp))
80 buf = (unsigned char *)xrealloc(buf, strlen(buf) + addresscount * (strlen(host) + 1) + 1);
84 * This is going to foo up on some ill-formed addresses.
85 * Note that we don't rewrite the fake address <> in order to
86 * avoid screwing up bounce suppression with a null Return-Path.
89 parendepth = state = 0;
90 has_host_part = has_bare_name_part = FALSE;
91 for (from = buf; *from; from++)
96 printf("state %d: %s", state, buf);
97 printf("%*s^\n", from - buf + 10, " ");
104 else if (*from == ')')
108 if (!parendepth && !has_host_part)
111 case 0: /* before header colon */
116 case 1: /* we've seen the colon, we're looking for addresses */
121 else if (*from == '@' || *from == '!')
122 has_host_part = TRUE;
123 else if (*from == '"')
126 * Not expanding on last non-WS == ';' deals with groupnames,
127 * an obscure misfeature described in sections
128 * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
130 else if ((*from == ',' || HEADER_END(from))
131 && has_bare_name_part
141 while (isspace(*from) || (*from == ','))
144 hostlen = strlen(host);
145 for (cp = from + strlen(from); cp >= from; --cp)
148 memcpy(from, host, hostlen);
149 from = p + hostlen + 1;
150 has_host_part = TRUE;
152 else if (from[1] == '('
153 && has_bare_name_part
155 && last_nws != ';' && last_nws != ')')
159 else if (!isspace(*from))
160 has_bare_name_part = TRUE;
163 case 2: /* we're in a string */
170 for (bp = from - 1; *bp == '\\'; bp--)
177 case 3: /* we're in a <>-enclosed address */
178 if (*from == '@' || *from == '!')
179 has_host_part = TRUE;
180 else if (*from == '>' && (from > buf && from[-1] != '<'))
187 hostlen = strlen(host);
188 for (cp = from + strlen(from); cp >= from; --cp)
191 memcpy(from, host, hostlen);
193 has_host_part = TRUE;
200 * If we passed a comma, reset everything.
202 if ((from > buf && from[-1] == ',') && !parendepth) {
203 has_host_part = has_bare_name_part = FALSE;
209 if (outlevel >= O_DEBUG)
210 report_complete(stdout, GT_("Rewritten version is %s\n"), buf);
212 *length = strlen(buf);
216 unsigned char *nxtaddr(hdr)
217 /* parse addresses in succession out of a specified RFC822 header */
218 const unsigned char *hdr; /* header to be parsed, NUL to continue previous hdr */
220 static unsigned char address[BUFSIZ];
222 static const unsigned char *hp;
223 static int state, oldstate;
225 static const unsigned char *orighdr;
229 #define START_HDR 0 /* before header colon */
230 #define SKIP_JUNK 1 /* skip whitespace, \n, and junk */
231 #define BARE_ADDRESS 2 /* collecting address without delimiters */
232 #define INSIDE_DQUOTE 3 /* inside double quotes */
233 #define INSIDE_PARENS 4 /* inside parentheses */
234 #define INSIDE_BRACKETS 5 /* inside bracketed address */
235 #define ENDIT_ALL 6 /* after last address */
237 #define NEXTTP() ((tp < sizeof(address)-1) ? tp++ : tp)
254 printf("state %d: %s", state, orighdr);
255 printf("%*s^\n", hp - orighdr + 10, " ");
259 if (state == ENDIT_ALL) /* after last address */
261 else if (HEADER_END(hp))
266 while (isspace(address[--tp]))
268 address[++tp] = '\0';
272 return((unsigned char *)NULL);
274 else if (*hp == '\\') /* handle RFC822 escaping */
276 if (state != INSIDE_PARENS)
278 address[NEXTTP()] = *hp++; /* take the escape */
279 address[NEXTTP()] = *hp; /* take following unsigned char */
284 case START_HDR: /* before header colon */
289 case SKIP_JUNK: /* looking for address start */
290 if (*hp == '"') /* quoted string */
292 oldstate = SKIP_JUNK;
293 state = INSIDE_DQUOTE;
294 address[NEXTTP()] = *hp;
296 else if (*hp == '(') /* address comment -- ignore */
299 oldstate = SKIP_JUNK;
300 state = INSIDE_PARENS;
302 else if (*hp == '<') /* begin <address> */
304 state = INSIDE_BRACKETS;
307 else if (*hp != ',' && !isspace(*hp))
310 state = BARE_ADDRESS;
314 case BARE_ADDRESS: /* collecting address without delimiters */
315 if (*hp == ',') /* end of address */
319 address[NEXTTP()] = '\0';
325 else if (*hp == '(') /* beginning of comment */
328 oldstate = BARE_ADDRESS;
329 state = INSIDE_PARENS;
331 else if (*hp == '<') /* beginning of real address */
333 state = INSIDE_BRACKETS;
336 else if (*hp == '"') /* quoted word, copy verbatim */
339 state = INSIDE_DQUOTE;
340 address[NEXTTP()] = *hp;
342 else if (!isspace(*hp)) /* just take it, ignoring whitespace */
343 address[NEXTTP()] = *hp;
346 case INSIDE_DQUOTE: /* we're in a quoted string, copy verbatim */
348 address[NEXTTP()] = *hp;
351 address[NEXTTP()] = *hp;
356 case INSIDE_PARENS: /* we're in a parenthesized comment, ignore */
365 case INSIDE_BRACKETS: /* possible <>-enclosed address */
366 if (*hp == '>') /* end of address */
368 address[NEXTTP()] = '\0';
374 else if (*hp == '<') /* nested <> */
376 else if (*hp == '"') /* quoted address */
378 address[NEXTTP()] = *hp;
379 oldstate = INSIDE_BRACKETS;
380 state = INSIDE_DQUOTE;
382 else /* just copy address */
383 address[NEXTTP()] = *hp;
392 static void parsebuf(unsigned char *longbuf, int reply)
398 reply_hack(longbuf, "HOSTNAME.NET");
399 printf("Rewritten buffer: %s", longbuf);
402 if ((cp = nxtaddr(longbuf)) != (unsigned char *)NULL)
404 printf("\t-> \"%s\"\n", cp);
406 ((cp = nxtaddr((unsigned char *)NULL)) != (unsigned char *)NULL);
411 main(int argc, char *argv[])
413 unsigned char buf[BUFSIZ], longbuf[BUFSIZ];
416 verbose = reply = FALSE;
417 while ((ch = getopt(argc, argv, "rv")) != EOF)
429 while (fgets(buf, sizeof(buf)-1, stdin))
431 if (buf[0] == ' ' || buf[0] == '\t')
432 strcat(longbuf, buf);
433 else if (!strncasecmp("From: ", buf, 6)
434 || !strncasecmp("To: ", buf, 4)
435 || !strncasecmp("Reply-", buf, 6)
436 || !strncasecmp("Cc: ", buf, 4)
437 || !strncasecmp("Bcc: ", buf, 5))
438 strcpy(longbuf, buf);
442 fputs(longbuf, stdout);
443 parsebuf(longbuf, reply);
450 fputs(longbuf, stdout);
451 parsebuf(longbuf, reply);