1 /*****************************************************************************
4 rfc822.c -- code for slicing and dicing RFC822 mail headers
7 nextaddr() -- parse the next address out of an RFC822 header
8 reply_hack() -- append hostname to local header addresses
11 How to parse RFC822 headers in C. This is not a fully conformant
12 implementation of RFC822 or RFC2822, but it has been in production use
13 in a widely-deployed MTA (fetcmail) since 1996 without complaints.
14 Really perverse combinations of quoting and commenting could break it.
17 Eric S. Raymond <esr@thyrsus.com>, 1997. This source code example
18 is part of fetchmail and the Unix Cookbook, and are released under the
19 MIT license. Compile with -DMAIN to build the demonstrator.
21 ******************************************************************************/
23 #define _XOPEN_SOURCE 600
24 #define __BSD_VISIBLE 1
27 #include "fetchmail.h"
42 const char *program_name = "rfc822";
50 #define HEADER_END(p) ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
52 #define BEFORE_EOL(s) (strcspn((s), "\r\n"))
55 char *buf /* header to be hacked */,
56 const char *host /* server hostname */,
58 /* hack message headers so replies will work properly */
60 char *from, *cp, last_nws = '\0', *parens_from = NULL;
61 int parendepth, state, has_bare_name_part, has_host_part;
66 if (strncasecmp("From:", buf, 5)
67 && strncasecmp("To:", buf, 3)
68 && strncasecmp("Reply-To:", buf, 9)
69 && strncasecmp("Return-Path:", buf, 12)
70 && strncasecmp("Cc:", buf, 3)
71 && strncasecmp("Bcc:", buf, 4)
72 && strncasecmp("Resent-From:", buf, 12)
73 && strncasecmp("Resent-To:", buf, 10)
74 && strncasecmp("Resent-Cc:", buf, 10)
75 && strncasecmp("Resent-Bcc:", buf, 11)
76 && strncasecmp("Apparently-From:", buf, 16)
77 && strncasecmp("Apparently-To:", buf, 14)
78 && strncasecmp("Sender:", buf, 7)
79 && strncasecmp("Resent-Sender:", buf, 14)
85 if (outlevel >= O_DEBUG) {
86 report_build(stdout, GT_("About to rewrite %s...\n"), (cp = sdump(buf, BEFORE_EOL(buf))));
90 /* make room to hack the address; buf must be malloced */
91 for (cp = buf; *cp; cp++)
92 if (*cp == ',' || isspace((unsigned char)*cp))
94 buf = (char *)xrealloc(buf, strlen(buf) + addresscount * (strlen(host) + 1) + 1);
98 * This is going to foo up on some ill-formed addresses.
99 * Note that we don't rewrite the fake address <> in order to
100 * avoid screwing up bounce suppression with a null Return-Path.
103 parendepth = state = 0;
104 has_host_part = has_bare_name_part = FALSE;
105 for (from = buf; *from; from++)
110 printf("state %d: %s", state, buf);
111 printf("%*s^\n", (int)(from - buf + 10), " ");
118 else if (*from == ')')
122 if (!parendepth && !has_host_part)
125 case 0: /* before header colon */
130 case 1: /* we've seen the colon, we're looking for addresses */
131 if (!isspace((unsigned char)*from))
135 else if (*from == '@' || *from == '!')
136 has_host_part = TRUE;
137 else if (*from == '"')
140 * Not expanding on last non-WS == ';' deals with groupnames,
141 * an obscure misfeature described in sections
142 * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
144 else if ((*from == ',' || HEADER_END(from))
145 && has_bare_name_part
155 while (isspace((unsigned char)*from) || (*from == ','))
158 hostlen = strlen(host);
159 for (cp = from + strlen(from); cp >= from; --cp)
162 memcpy(from, host, hostlen);
163 from = p + hostlen + 1;
164 has_host_part = TRUE;
166 else if (from[1] == '('
167 && has_bare_name_part
169 && last_nws != ';' && last_nws != ')')
173 else if (!isspace((unsigned char)*from))
174 has_bare_name_part = TRUE;
177 case 2: /* we're in a string */
184 for (bp = from - 1; *bp == '\\'; bp--)
191 case 3: /* we're in a <>-enclosed address */
192 if (*from == '@' || *from == '!')
193 has_host_part = TRUE;
194 else if (*from == '>' && (from > buf && from[-1] != '<'))
201 hostlen = strlen(host);
202 for (cp = from + strlen(from); cp >= from; --cp)
205 memcpy(from, host, hostlen);
207 has_host_part = TRUE;
214 * If we passed a comma, reset everything.
216 if ((from > buf && from[-1] == ',') && !parendepth) {
217 has_host_part = has_bare_name_part = FALSE;
223 if (outlevel >= O_DEBUG) {
224 report_complete(stdout, GT_("...rewritten version is %s.\n"),
225 (cp = sdump(buf, BEFORE_EOL(buf))));
230 *length = strlen(buf);
234 char *nxtaddr(const char *hdr /* header to be parsed, NUL to continue previous hdr */)
235 /* parse addresses in succession out of a specified RFC822 header */
237 static char address[BUFSIZ];
239 static const char *hp;
240 static int state, oldstate;
242 static const char *orighdr;
246 #define START_HDR 0 /* before header colon */
247 #define SKIP_JUNK 1 /* skip whitespace, \n, and junk */
248 #define BARE_ADDRESS 2 /* collecting address without delimiters */
249 #define INSIDE_DQUOTE 3 /* inside double quotes */
250 #define INSIDE_PARENS 4 /* inside parentheses */
251 #define INSIDE_BRACKETS 5 /* inside bracketed address */
252 #define ENDIT_ALL 6 /* after last address */
254 #define NEXTTP() ((tp < sizeof(address)-1) ? tp++ : tp)
266 if (!hp) return NULL;
273 printf("state %d: %s", state, orighdr);
274 printf("%*s^\n", (int)(hp - orighdr + 10), " ");
278 if (state == ENDIT_ALL) /* after last address */
280 else if (HEADER_END(hp))
285 while (tp > 0 && isspace((unsigned char)address[tp - 1]))
293 else if (*hp == '\\') /* handle RFC822 escaping */
295 if (state != INSIDE_PARENS)
297 address[NEXTTP()] = *hp++; /* take the escape */
298 address[NEXTTP()] = *hp; /* take following unsigned char */
303 case START_HDR: /* before header colon */
308 case SKIP_JUNK: /* looking for address start */
309 if (*hp == '"') /* quoted string */
311 oldstate = SKIP_JUNK;
312 state = INSIDE_DQUOTE;
313 address[NEXTTP()] = *hp;
315 else if (*hp == '(') /* address comment -- ignore */
318 oldstate = SKIP_JUNK;
319 state = INSIDE_PARENS;
321 else if (*hp == '<') /* begin <address> */
323 state = INSIDE_BRACKETS;
326 else if (*hp != ',' && !isspace((unsigned char)*hp))
329 state = BARE_ADDRESS;
333 case BARE_ADDRESS: /* collecting address without delimiters */
334 if (*hp == ',') /* end of address */
338 address[NEXTTP()] = '\0';
344 else if (*hp == '(') /* beginning of comment */
347 oldstate = BARE_ADDRESS;
348 state = INSIDE_PARENS;
350 else if (*hp == '<') /* beginning of real address */
352 state = INSIDE_BRACKETS;
355 else if (*hp == '"') /* quoted word, copy verbatim */
358 state = INSIDE_DQUOTE;
359 address[NEXTTP()] = *hp;
361 else if (!isspace((unsigned char)*hp)) /* just take it, ignoring whitespace */
362 address[NEXTTP()] = *hp;
365 case INSIDE_DQUOTE: /* we're in a quoted string, copy verbatim */
366 address[NEXTTP()] = *hp;
371 case INSIDE_PARENS: /* we're in a parenthesized comment, ignore */
380 case INSIDE_BRACKETS: /* possible <>-enclosed address */
381 if (*hp == '>') /* end of address */
383 address[NEXTTP()] = '\0';
389 else if (*hp == '<') /* nested <> */
391 else if (*hp == '"') /* quoted address */
393 address[NEXTTP()] = *hp;
394 oldstate = INSIDE_BRACKETS;
395 state = INSIDE_DQUOTE;
397 else /* just copy address */
398 address[NEXTTP()] = *hp;
407 static void parsebuf(char *longbuf, int reply)
414 reply_hack(longbuf, "HOSTNAME.NET", &dummy);
415 printf("Rewritten buffer: %s", (char *)longbuf);
418 if ((cp = nxtaddr(longbuf)) != (char *)NULL)
420 printf("\t-> \"%s\"\n", (char *)cp);
422 ((cp = nxtaddr((char *)NULL)) != (char *)NULL);
427 int main(int argc, char *argv[])
429 char buf[BUFSIZ], longbuf[BUFSIZ];
432 verbose = reply = FALSE;
433 while ((ch = getopt(argc, argv, "rv")) != EOF)
447 while (fgets(buf, sizeof(buf)-1, stdin))
449 if (buf[0] == ' ' || buf[0] == '\t')
450 strlcat(longbuf, buf, sizeof(longbuf));
451 else if (!strncasecmp("From: ", buf, 6)
452 || !strncasecmp("To: ", buf, 4)
453 || !strncasecmp("Reply-", buf, 6)
454 || !strncasecmp("Cc: ", buf, 4)
455 || !strncasecmp("Bcc: ", buf, 5))
456 strlcpy(longbuf, buf, sizeof(longbuf));
460 fputs(longbuf, stdout);
461 parsebuf(longbuf, reply);
468 fputs(longbuf, stdout);
469 parsebuf(longbuf, reply);