2 * rfc822.c -- code for slicing and dicing RFC822 mail headers
4 * Copyright 1997 by Eric S. Raymond
5 * For license terms, see the file COPYING in this directory.
11 #if defined(STDC_HEADERS)
15 #include "fetchmail.h"
17 #define HEADER_END(p) ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
23 char *reply_hack(buf, host)
24 /* hack message headers so replies will work properly */
25 char *buf; /* header to be hacked */
26 const char *host; /* server hostname */
28 char *from, *cp, last_nws = '\0';
29 int parendepth, state, has_bare_name_part, has_host_part;
32 if (strncasecmp("From: ", buf, 6)
33 && strncasecmp("To: ", buf, 4)
34 && strncasecmp("Reply-To: ", buf, 10)
35 && strncasecmp("Return-Path: ", buf, 13)
36 && strncasecmp("Cc: ", buf, 4)
37 && strncasecmp("Bcc: ", buf, 5)) {
42 /* make room to hack the address; buf must be malloced */
43 for (cp = buf; *cp; cp++)
44 if (*cp == ',' || isspace(*cp))
46 buf = (char *)xrealloc(buf, strlen(buf) + addresscount * strlen(host) + 1);
50 * This is going to foo up on some ill-formed addresses. For example,
51 * "From: John Smith (Systems) <jsmith@domain>" will get rewritten as
52 * "From: John Smith@my.pop.server (Systems) <jsmith@domain>" because
53 * the state machine can't look ahead to the <> part past the comment
54 * and instead treats `John Smith' as a bareword address.
57 parendepth = state = 0;
58 has_host_part = has_bare_name_part = FALSE;
59 for (from = buf; *from; from++)
64 printf("state %d: %s", state, buf);
65 printf("%*s^\n", from - buf + 10, " ");
71 else if (*from == ')')
74 if (!parendepth && !has_host_part)
77 case 0: /* before header colon */
82 case 1: /* we've seen the colon, we're looking for addresses */
87 else if (*from == '@')
89 else if (*from == '"')
92 * Not expanding on last non-WS == ';' deals with groupnames,
93 * an obscure misfeature described in sections
94 * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
96 else if ((*from == ',' || HEADER_END(from) || from[1] == '(')
99 && last_nws != ';' && last_nws != ')')
103 while (isspace(*from) || (*from == ','))
106 hostlen = strlen(host);
107 for (cp = from + strlen(from); cp >= from; --cp)
110 memcpy(from, host, hostlen);
112 has_host_part = TRUE;
114 else if (!isspace(*from))
115 has_bare_name_part = TRUE;
118 case 2: /* we're in a string */
123 case 3: /* we're in a <>-enclosed address */
125 has_host_part = TRUE;
126 else if (*from == '>')
133 hostlen = strlen(host);
134 for (cp = from + strlen(from); cp >= from; --cp)
137 memcpy(from, host, hostlen);
139 has_host_part = TRUE;
146 * If we passed a comma, reset everything.
148 if (from[-1] == ',' && !parendepth) {
149 has_host_part = has_bare_name_part = FALSE;
157 /* parse addresses in succession out of a specified RFC822 header */
158 const char *hdr; /* header to be parsed, NUL to continue previous hdr */
160 static char *tp, address[POPBUFSIZE+1];
161 static const char *hp;
162 static int state, oldstate;
164 static const char *orighdr;
165 #endif /* TESTMAIN */
168 #define START_HDR 0 /* before header colon */
169 #define SKIP_JUNK 1 /* skip whitespace, \n, and junk */
170 #define BARE_ADDRESS 2 /* collecting address without delimiters */
171 #define INSIDE_DQUOTE 3 /* inside double quotes */
172 #define INSIDE_PARENS 4 /* inside parentheses */
173 #define INSIDE_BRACKETS 5 /* inside bracketed address */
174 #define ENDIT_ALL 6 /* after last address */
182 #endif /* TESTMAIN */
191 printf("state %d: %s", state, orighdr);
192 printf("%*s^\n", hp - orighdr + 10, " ");
194 #endif /* TESTMAIN */
196 if (state == ENDIT_ALL) /* after last address */
198 else if (HEADER_END(hp))
203 while (isspace(*--tp))
207 return(tp > address ? (tp = address) : (char *)NULL);
209 else if (*hp == '\\') /* handle RFC822 escaping */
211 if (state != INSIDE_PARENS)
213 *tp++ = *hp++; /* take the escape */
214 *tp++ = *hp; /* take following char */
219 case START_HDR: /* before header colon */
224 case SKIP_JUNK: /* looking for address start */
225 if (*hp == '"') /* quoted string */
227 oldstate = SKIP_JUNK;
228 state = INSIDE_DQUOTE;
231 else if (*hp == '(') /* address comment -- ignore */
234 oldstate = SKIP_JUNK;
235 state = INSIDE_PARENS;
237 else if (*hp == '<') /* begin <address> */
239 state = INSIDE_BRACKETS;
242 else if (*hp != ',' && !isspace(*hp))
245 state = BARE_ADDRESS;
249 case BARE_ADDRESS: /* collecting address without delimiters */
250 if (*hp == ',') /* end of address */
256 return(tp = address);
259 else if (*hp == '(') /* beginning of comment */
262 oldstate = BARE_ADDRESS;
263 state = INSIDE_PARENS;
265 else if (*hp == '<') /* beginning of real address */
267 state = INSIDE_BRACKETS;
270 else if (!isspace(*hp)) /* just take it, ignoring whitespace */
274 case INSIDE_DQUOTE: /* we're in a quoted string, copy verbatim */
284 case INSIDE_PARENS: /* we're in a parenthesized comment, ignore */
293 case INSIDE_BRACKETS: /* possible <>-enclosed address */
294 if (*hp == '>') /* end of address */
299 return(tp = address);
301 else if (*hp == '<') /* nested <> */
303 else if (*hp == '"') /* quoted address */
306 oldstate = INSIDE_BRACKETS;
307 state = INSIDE_DQUOTE;
309 else /* just copy address */
319 static void parsebuf(char *longbuf, int reply)
325 reply_hack(longbuf, "HOSTNAME.NET");
326 printf("Rewritten buffer: %s", longbuf);
329 if ((cp = nxtaddr(longbuf)) != (char *)NULL)
331 printf("\t-> \"%s\"\n", cp);
333 ((cp = nxtaddr((char *)NULL)) != (char *)NULL);
338 main(int argc, char *argv[])
340 char buf[MSGBUFSIZE], longbuf[BUFSIZ];
343 verbose = reply = FALSE;
344 while ((ch = getopt(argc, argv, "rv")) != EOF)
356 while (fgets(buf, sizeof(buf)-1, stdin))
358 if (buf[0] == ' ' || buf[0] == '\t')
359 strcat(longbuf, buf);
360 else if (!strncasecmp("From: ", buf, 6)
361 || !strncasecmp("To: ", buf, 4)
362 || !strncasecmp("Reply-", buf, 6)
363 || !strncasecmp("Cc: ", buf, 4)
364 || !strncasecmp("Bcc: ", buf, 5))
365 strcpy(longbuf, buf);
369 fputs(longbuf, stdout);
370 parsebuf(longbuf, reply);
377 fputs(longbuf, stdout);
378 parsebuf(longbuf, reply);
381 #endif /* TESTMAIN */