]> Pileus Git - ~andy/fetchmail/blob - rfc822.c
6ac353cb2de40add60e2e6e9c13f283e3dea097d
[~andy/fetchmail] / rfc822.c
1 /*****************************************************************************
2
3 NAME:
4    rfc822.c -- code for slicing and dicing RFC822 mail headers
5
6 ENTRY POINTS:
7    nextaddr() -- parse the next address out of an RFC822 header
8    reply_hack() -- append hostname to local header addresses 
9
10 THEORY:
11    How to parse RFC822 headers in C. This is not a fully conformant
12 implementation of RFC822 or RFC2822, but it has been in production use
13 in a widely-deployed MTA (fetcmail) since 1996 without complaints.
14 Really perverse combinations of quoting and commenting could break it.
15
16 AUTHOR:
17    Eric S. Raymond <esr@thyrsus.com>, 1997.  This source code example
18 is part of fetchmail and the Unix Cookbook, and are released under the
19 MIT license.  Compile with -DMAIN to build the demonstrator.
20
21 ******************************************************************************/
22
23 #define _XOPEN_SOURCE 600
24
25 #include "config.h"
26
27 #include  <stdio.h>
28 #include  <ctype.h>
29 #include  <string.h>
30 #include  <strings.h>
31 #include  <stdlib.h>
32
33 #include "fetchmail.h"
34 #include "sdump.h"
35
36 #ifndef MAIN
37 #include "gettext.h"
38 #else
39 #include  <unistd.h>
40 static int verbose;
41 const char *program_name = "rfc822";
42 #endif /* MAIN */
43
44 #ifndef TRUE
45 #define TRUE 1
46 #define FALSE 0
47 #endif
48
49 #define HEADER_END(p)   ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
50
51 #define BEFORE_EOL(s)   (strcspn((s), "\r\n"))
52
53 char *reply_hack(
54         char *buf               /* header to be hacked */,
55         const char *host        /* server hostname */,
56         size_t *length)
57 /* hack message headers so replies will work properly */
58 {
59     char *from, *cp, last_nws = '\0', *parens_from = NULL;
60     int parendepth, state, has_bare_name_part, has_host_part;
61 #ifndef MAIN
62     int addresscount = 1;
63 #endif /* MAIN */
64
65     if (strncasecmp("From:", buf, 5)
66         && strncasecmp("To:", buf, 3)
67         && strncasecmp("Reply-To:", buf, 9)
68         && strncasecmp("Return-Path:", buf, 12)
69         && strncasecmp("Cc:", buf, 3)
70         && strncasecmp("Bcc:", buf, 4)
71         && strncasecmp("Resent-From:", buf, 12)
72         && strncasecmp("Resent-To:", buf, 10)
73         && strncasecmp("Resent-Cc:", buf, 10)
74         && strncasecmp("Resent-Bcc:", buf, 11)
75         && strncasecmp("Apparently-From:", buf, 16)
76         && strncasecmp("Apparently-To:", buf, 14)
77         && strncasecmp("Sender:", buf, 7)
78         && strncasecmp("Resent-Sender:", buf, 14)
79        ) {
80         return(buf);
81     }
82
83 #ifndef MAIN
84     if (outlevel >= O_DEBUG) {
85         report_build(stdout, GT_("About to rewrite %s...\n"), (cp = sdump(buf, BEFORE_EOL(buf))));
86         xfree(cp);
87     }
88
89     /* make room to hack the address; buf must be malloced */
90     for (cp = buf; *cp; cp++)
91         if (*cp == ',' || isspace((unsigned char)*cp))
92             addresscount++;
93     buf = (char *)xrealloc(buf, strlen(buf) + addresscount * (strlen(host) + 1) + 1);
94 #endif /* MAIN */
95
96     /*
97      * This is going to foo up on some ill-formed addresses.
98      * Note that we don't rewrite the fake address <> in order to
99      * avoid screwing up bounce suppression with a null Return-Path.
100      */
101
102     parendepth = state = 0;
103     has_host_part = has_bare_name_part = FALSE;
104     for (from = buf; *from; from++)
105     {
106 #ifdef MAIN
107         if (verbose)
108         {
109             printf("state %d: %s", state, buf);
110             printf("%*s^\n", (int)(from - buf + 10), " ");
111         }
112 #endif /* MAIN */
113         if (state != 2)
114         {
115             if (*from == '(')
116                 ++parendepth;
117             else if (*from == ')')
118                 --parendepth;
119         }
120
121         if (!parendepth && !has_host_part)
122             switch (state)
123             {
124             case 0:     /* before header colon */
125                 if (*from == ':')
126                     state = 1;
127                 break;
128
129             case 1:     /* we've seen the colon, we're looking for addresses */
130                 if (!isspace((unsigned char)*from))
131                     last_nws = *from;
132                 if (*from == '<')
133                     state = 3;
134                 else if (*from == '@' || *from == '!')
135                     has_host_part = TRUE;
136                 else if (*from == '"')
137                     state = 2;
138                 /*
139                  * Not expanding on last non-WS == ';' deals with groupnames,
140                  * an obscure misfeature described in sections
141                  * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
142                  */
143                 else if ((*from == ',' || HEADER_END(from))
144                          && has_bare_name_part
145                          && !has_host_part
146                          && last_nws != ';')
147                 {
148                     int hostlen;
149                     char *p;
150
151                     p = from;
152                     if (parens_from)
153                         from = parens_from;
154                     while (isspace((unsigned char)*from) || (*from == ','))
155                         --from;
156                     from++;
157                     hostlen = strlen(host);
158                     for (cp = from + strlen(from); cp >= from; --cp)
159                         cp[hostlen+1] = *cp;
160                     *from++ = '@';
161                     memcpy(from, host, hostlen);
162                     from = p + hostlen + 1;
163                     has_host_part = TRUE;
164                 } 
165                 else if (from[1] == '('
166                          && has_bare_name_part
167                          && !has_host_part
168                          && last_nws != ';' && last_nws != ')')
169                 {
170                     parens_from = from;
171                 } 
172                 else if (!isspace((unsigned char)*from))
173                     has_bare_name_part = TRUE;
174                 break;
175
176             case 2:     /* we're in a string */
177                 if (*from == '"')
178                 {
179                     char        *bp;
180                     int         bscount;
181
182                     bscount = 0;
183                     for (bp = from - 1; *bp == '\\'; bp--)
184                         bscount++;
185                     if (!(bscount % 2))
186                         state = 1;
187                 }
188                 break;
189
190             case 3:     /* we're in a <>-enclosed address */
191                 if (*from == '@' || *from == '!')
192                     has_host_part = TRUE;
193                 else if (*from == '>' && (from > buf && from[-1] != '<'))
194                 {
195                     state = 1;
196                     if (!has_host_part)
197                     {
198                         int hostlen;
199
200                         hostlen = strlen(host);
201                         for (cp = from + strlen(from); cp >= from; --cp)
202                             cp[hostlen+1] = *cp;
203                         *from++ = '@';
204                         memcpy(from, host, hostlen);
205                         from += hostlen;
206                         has_host_part = TRUE;
207                     }
208                 }
209                 break;
210             }
211
212         /*
213          * If we passed a comma, reset everything.
214          */
215         if ((from > buf && from[-1] == ',') && !parendepth) {
216           has_host_part = has_bare_name_part = FALSE;
217           parens_from = NULL;
218         }
219     }
220
221 #ifndef MAIN
222     if (outlevel >= O_DEBUG) {
223         report_complete(stdout, GT_("...rewritten version is %s.\n"),
224                         (cp = sdump(buf, BEFORE_EOL(buf))));
225         xfree(cp)
226     }
227
228 #endif /* MAIN */
229     *length = strlen(buf);
230     return(buf);
231 }
232
233 char *nxtaddr(const char *hdr /* header to be parsed, NUL to continue previous hdr */)
234 /* parse addresses in succession out of a specified RFC822 header */
235 {
236     static char address[BUFSIZ];
237     static size_t tp;
238     static const char *hp;
239     static int  state, oldstate;
240 #ifdef MAIN
241     static const char *orighdr;
242 #endif /* MAIN */
243     int parendepth = 0;
244
245 #define START_HDR       0       /* before header colon */
246 #define SKIP_JUNK       1       /* skip whitespace, \n, and junk */
247 #define BARE_ADDRESS    2       /* collecting address without delimiters */
248 #define INSIDE_DQUOTE   3       /* inside double quotes */
249 #define INSIDE_PARENS   4       /* inside parentheses */
250 #define INSIDE_BRACKETS 5       /* inside bracketed address */
251 #define ENDIT_ALL       6       /* after last address */
252
253 #define NEXTTP()        ((tp < sizeof(address)-1) ? tp++ : tp)
254
255     if (hdr)
256     {
257         hp = hdr;
258         state = START_HDR;
259 #ifdef MAIN
260         orighdr = hdr;
261 #endif /* MAIN */
262         tp = 0;
263     }
264
265     if (!hp) return NULL;
266
267     for (; *hp; hp++)
268     {
269 #ifdef MAIN
270         if (verbose)
271         {
272             printf("state %d: %s", state, orighdr);
273             printf("%*s^\n", (int)(hp - orighdr + 10), " ");
274         }
275 #endif /* MAIN */
276
277         if (state == ENDIT_ALL)         /* after last address */
278             return(NULL);
279         else if (HEADER_END(hp))
280         {
281             state = ENDIT_ALL;
282             if (tp)
283             {
284                 while (tp > 0 && isspace((unsigned char)address[tp - 1]))
285                     tp--;
286                 address[tp] = '\0';
287                 tp = 0;
288                 return (address);
289             }
290             return(NULL);
291         }
292         else if (*hp == '\\')           /* handle RFC822 escaping */
293         {
294             if (state != INSIDE_PARENS)
295             {
296                 address[NEXTTP()] = *hp++;      /* take the escape */
297                 address[NEXTTP()] = *hp;        /* take following unsigned char */
298             }
299         }
300         else switch (state)
301         {
302         case START_HDR:   /* before header colon */
303             if (*hp == ':')
304                 state = SKIP_JUNK;
305             break;
306
307         case SKIP_JUNK:         /* looking for address start */
308             if (*hp == '"')     /* quoted string */
309             {
310                 oldstate = SKIP_JUNK;
311                 state = INSIDE_DQUOTE;
312                 address[NEXTTP()] = *hp;
313             }
314             else if (*hp == '(')        /* address comment -- ignore */
315             {
316                 parendepth = 1;
317                 oldstate = SKIP_JUNK;
318                 state = INSIDE_PARENS;    
319             }
320             else if (*hp == '<')        /* begin <address> */
321             {
322                 state = INSIDE_BRACKETS;
323                 tp = 0;
324             }
325             else if (*hp != ',' && !isspace((unsigned char)*hp))
326             {
327                 --hp;
328                 state = BARE_ADDRESS;
329             }
330             break;
331
332         case BARE_ADDRESS:      /* collecting address without delimiters */
333             if (*hp == ',')     /* end of address */
334             {
335                 if (tp)
336                 {
337                     address[NEXTTP()] = '\0';
338                     state = SKIP_JUNK;
339                     tp = 0;
340                     return(address);
341                 }
342             }
343             else if (*hp == '(')        /* beginning of comment */
344             {
345                 parendepth = 1;
346                 oldstate = BARE_ADDRESS;
347                 state = INSIDE_PARENS;    
348             }
349             else if (*hp == '<')        /* beginning of real address */
350             {
351                 state = INSIDE_BRACKETS;
352                 tp = 0;
353             }
354             else if (*hp == '"')        /* quoted word, copy verbatim */
355             {
356                 oldstate = state;
357                 state = INSIDE_DQUOTE;
358                 address[NEXTTP()] = *hp;
359             }
360             else if (!isspace((unsigned char)*hp))      /* just take it, ignoring whitespace */
361                 address[NEXTTP()] = *hp;
362             break;
363
364         case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
365             address[NEXTTP()] = *hp;
366             if (*hp == '"')
367                 state = oldstate;
368             break;
369
370         case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
371             if (*hp == '(')
372                 ++parendepth;
373             else if (*hp == ')')
374                 --parendepth;
375             if (parendepth == 0)
376                 state = oldstate;
377             break;
378
379         case INSIDE_BRACKETS:   /* possible <>-enclosed address */
380             if (*hp == '>')     /* end of address */
381             {
382                 address[NEXTTP()] = '\0';
383                 state = SKIP_JUNK;
384                 ++hp;
385                 tp = 0;
386                 return(address);
387             }
388             else if (*hp == '<')        /* nested <> */
389                 tp = 0;
390             else if (*hp == '"')        /* quoted address */
391             {
392                 address[NEXTTP()] = *hp;
393                 oldstate = INSIDE_BRACKETS;
394                 state = INSIDE_DQUOTE;
395             }
396             else                        /* just copy address */
397                 address[NEXTTP()] = *hp;
398             break;
399         }
400     }
401
402     return(NULL);
403 }
404
405 #ifdef MAIN
406 static void parsebuf(char *longbuf, int reply)
407 {
408     char        *cp;
409     size_t      dummy;
410
411     if (reply)
412     {
413         reply_hack(longbuf, "HOSTNAME.NET", &dummy);
414         printf("Rewritten buffer: %s", (char *)longbuf);
415     }
416     else
417         if ((cp = nxtaddr(longbuf)) != (char *)NULL)
418             do {
419                 printf("\t-> \"%s\"\n", (char *)cp);
420             } while
421                 ((cp = nxtaddr((char *)NULL)) != (char *)NULL);
422 }
423
424
425
426 int main(int argc, char *argv[])
427 {
428     char        buf[BUFSIZ], longbuf[BUFSIZ];
429     int         ch, reply;
430     
431     verbose = reply = FALSE;
432     while ((ch = getopt(argc, argv, "rv")) != EOF)
433         switch(ch)
434         {
435         case 'r':
436             reply = TRUE;
437             break;
438
439         case 'v':
440             verbose = TRUE;
441             break;
442         }
443
444     longbuf[0] = '\0';
445
446     while (fgets(buf, sizeof(buf)-1, stdin))
447     {
448         if (buf[0] == ' ' || buf[0] == '\t')
449             strlcat(longbuf, buf, sizeof(longbuf));
450         else if (!strncasecmp("From: ", buf, 6)
451                     || !strncasecmp("To: ", buf, 4)
452                     || !strncasecmp("Reply-", buf, 6)
453                     || !strncasecmp("Cc: ", buf, 4)
454                     || !strncasecmp("Bcc: ", buf, 5))
455             strlcpy(longbuf, buf, sizeof(longbuf));
456         else if (longbuf[0])
457         {
458             if (verbose)
459                 fputs(longbuf, stdout);
460             parsebuf(longbuf, reply);
461             longbuf[0] = '\0';
462         }
463     }
464     if (longbuf[0])
465     {
466         if (verbose)
467             fputs(longbuf, stdout);
468         parsebuf(longbuf, reply);
469     }
470     exit(0);
471 }
472 #endif /* MAIN */
473
474 /* rfc822.c end */