]> Pileus Git - ~andy/fetchmail/blob - rfc822.c
99c78c0ef094541eac09930a3ec0adca073470da
[~andy/fetchmail] / rfc822.c
1 /*****************************************************************************
2
3 NAME:
4    rfc822.c -- code for slicing and dicing RFC822 mail headers
5
6 ENTRY POINTS:
7    nextaddr() -- parse the next address out of an RFC822 header
8    reply_hack() -- append hostname to local header addresses 
9
10 THEORY:
11    How to parse RFC822 headers in C. This is not a fully conformant
12 implementation of RFC822 or RFC2822, but it has been in production use
13 in a widely-deployed MTA (fetcmail) since 1996 without complaints.
14 Really perverse combinations of quoting and commenting could break it.
15
16 AUTHOR:
17    Eric S. Raymond <esr@thyrsus.com>, 1997.  This source code example
18 is part of fetchmail and the Unix Cookbook, and are released under the
19 MIT license.  Compile with -DMAIN to build the demonstrator.
20
21 ******************************************************************************/
22 #include  <stdio.h>
23 #include  <ctype.h>
24 #include  <string.h>
25 #include  <stdlib.h>
26
27 #include "fetchmail.h"
28
29 #ifndef MAIN
30 #include "i18n.h"
31 #else
32 #include  <unistd.h>
33 static int verbose;
34 char *program_name = "rfc822";
35 #endif /* MAIN */
36
37 #ifndef TRUE
38 #define TRUE 1
39 #define FALSE 0
40 #endif
41
42 #define HEADER_END(p)   ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
43
44 unsigned char *reply_hack(
45         unsigned char *buf              /* header to be hacked */,
46         const unsigned char *host       /* server hostname */,
47         size_t *length)
48 /* hack message headers so replies will work properly */
49 {
50     unsigned char *from, *cp, last_nws = '\0', *parens_from = NULL;
51     int parendepth, state, has_bare_name_part, has_host_part;
52 #ifndef MAIN
53     int addresscount = 1;
54 #endif /* MAIN */
55
56     if (strncasecmp("From:", buf, 5)
57         && strncasecmp("To:", buf, 3)
58         && strncasecmp("Reply-To:", buf, 9)
59         && strncasecmp("Return-Path:", buf, 12)
60         && strncasecmp("Cc:", buf, 3)
61         && strncasecmp("Bcc:", buf, 4)
62         && strncasecmp("Resent-From:", buf, 12)
63         && strncasecmp("Resent-To:", buf, 10)
64         && strncasecmp("Resent-Cc:", buf, 10)
65         && strncasecmp("Resent-Bcc:", buf, 11)
66         && strncasecmp("Apparently-From:", buf, 16)
67         && strncasecmp("Apparently-To:", buf, 14)
68         && strncasecmp("Sender:", buf, 7)
69         && strncasecmp("Resent-Sender:", buf, 14)
70        ) {
71         return(buf);
72     }
73
74 #ifndef MAIN
75     if (outlevel >= O_DEBUG)
76         report_build(stdout, GT_("About to rewrite %s"), buf);
77
78     /* make room to hack the address; buf must be malloced */
79     for (cp = buf; *cp; cp++)
80         if (*cp == ',' || isspace(*cp))
81             addresscount++;
82     buf = (unsigned char *)xrealloc(buf, strlen(buf) + addresscount * (strlen(host) + 1) + 1);
83 #endif /* MAIN */
84
85     /*
86      * This is going to foo up on some ill-formed addresses.
87      * Note that we don't rewrite the fake address <> in order to
88      * avoid screwing up bounce suppression with a null Return-Path.
89      */
90
91     parendepth = state = 0;
92     has_host_part = has_bare_name_part = FALSE;
93     for (from = buf; *from; from++)
94     {
95 #ifdef MAIN
96         if (verbose)
97         {
98             printf("state %d: %s", state, (char *)buf);
99             printf("%*s^\n", from - buf + 10, " ");
100         }
101 #endif /* MAIN */
102         if (state != 2)
103         {
104             if (*from == '(')
105                 ++parendepth;
106             else if (*from == ')')
107                 --parendepth;
108         }
109
110         if (!parendepth && !has_host_part)
111             switch (state)
112             {
113             case 0:     /* before header colon */
114                 if (*from == ':')
115                     state = 1;
116                 break;
117
118             case 1:     /* we've seen the colon, we're looking for addresses */
119                 if (!isspace(*from))
120                     last_nws = *from;
121                 if (*from == '<')
122                     state = 3;
123                 else if (*from == '@' || *from == '!')
124                     has_host_part = TRUE;
125                 else if (*from == '"')
126                     state = 2;
127                 /*
128                  * Not expanding on last non-WS == ';' deals with groupnames,
129                  * an obscure misfeature described in sections
130                  * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
131                  */
132                 else if ((*from == ',' || HEADER_END(from))
133                          && has_bare_name_part
134                          && !has_host_part
135                          && last_nws != ';')
136                 {
137                     int hostlen;
138                     unsigned char *p;
139
140                     p = from;
141                     if (parens_from)
142                         from = parens_from;
143                     while (isspace(*from) || (*from == ','))
144                         --from;
145                     from++;
146                     hostlen = strlen(host);
147                     for (cp = from + strlen(from); cp >= from; --cp)
148                         cp[hostlen+1] = *cp;
149                     *from++ = '@';
150                     memcpy(from, host, hostlen);
151                     from = p + hostlen + 1;
152                     has_host_part = TRUE;
153                 } 
154                 else if (from[1] == '('
155                          && has_bare_name_part
156                          && !has_host_part
157                          && last_nws != ';' && last_nws != ')')
158                 {
159                     parens_from = from;
160                 } 
161                 else if (!isspace(*from))
162                     has_bare_name_part = TRUE;
163                 break;
164
165             case 2:     /* we're in a string */
166                 if (*from == '"')
167                 {
168                     char        *bp;
169                     int         bscount;
170
171                     bscount = 0;
172                     for (bp = from - 1; *bp == '\\'; bp--)
173                         bscount++;
174                     if (!(bscount % 2))
175                         state = 1;
176                 }
177                 break;
178
179             case 3:     /* we're in a <>-enclosed address */
180                 if (*from == '@' || *from == '!')
181                     has_host_part = TRUE;
182                 else if (*from == '>' && (from > buf && from[-1] != '<'))
183                 {
184                     state = 1;
185                     if (!has_host_part)
186                     {
187                         int hostlen;
188
189                         hostlen = strlen(host);
190                         for (cp = from + strlen(from); cp >= from; --cp)
191                             cp[hostlen+1] = *cp;
192                         *from++ = '@';
193                         memcpy(from, host, hostlen);
194                         from += hostlen;
195                         has_host_part = TRUE;
196                     }
197                 }
198                 break;
199             }
200
201         /*
202          * If we passed a comma, reset everything.
203          */
204         if ((from > buf && from[-1] == ',') && !parendepth) {
205           has_host_part = has_bare_name_part = FALSE;
206           parens_from = NULL;
207         }
208     }
209
210 #ifndef MAIN
211     if (outlevel >= O_DEBUG)
212         report_complete(stdout, GT_("Rewritten version is %s\n"), buf);
213 #endif /* MAIN */
214     *length = strlen(buf);
215     return(buf);
216 }
217
218 unsigned char *nxtaddr(const unsigned char *hdr /* header to be parsed, NUL to continue previous hdr */)
219 /* parse addresses in succession out of a specified RFC822 header */
220 {
221     static unsigned char address[BUFSIZ];
222     static int tp;
223     static const unsigned char *hp;
224     static int  state, oldstate;
225 #ifdef MAIN
226     static const unsigned char *orighdr;
227 #endif /* MAIN */
228     int parendepth = 0;
229
230 #define START_HDR       0       /* before header colon */
231 #define SKIP_JUNK       1       /* skip whitespace, \n, and junk */
232 #define BARE_ADDRESS    2       /* collecting address without delimiters */
233 #define INSIDE_DQUOTE   3       /* inside double quotes */
234 #define INSIDE_PARENS   4       /* inside parentheses */
235 #define INSIDE_BRACKETS 5       /* inside bracketed address */
236 #define ENDIT_ALL       6       /* after last address */
237
238 #define NEXTTP()        ((tp < sizeof(address)-1) ? tp++ : tp)
239
240     if (hdr)
241     {
242         hp = hdr;
243         state = START_HDR;
244 #ifdef MAIN
245         orighdr = hdr;
246 #endif /* MAIN */
247         tp = 0;
248     }
249
250     for (; *hp; hp++)
251     {
252 #ifdef MAIN
253         if (verbose)
254         {
255             printf("state %d: %s", state, (char *)orighdr);
256             printf("%*s^\n", hp - orighdr + 10, " ");
257         }
258 #endif /* MAIN */
259
260         if (state == ENDIT_ALL)         /* after last address */
261             return(NULL);
262         else if (HEADER_END(hp))
263         {
264             state = ENDIT_ALL;
265             if (tp)
266             {
267                 --tp;
268                 while (tp >= 0 && isspace(address[tp--]))
269                     continue;
270                 address[++tp] = '\0';
271                 tp = 0;
272                 return (address);
273             }
274             return((unsigned char *)NULL);
275         }
276         else if (*hp == '\\')           /* handle RFC822 escaping */
277         {
278             if (state != INSIDE_PARENS)
279             {
280                 address[NEXTTP()] = *hp++;      /* take the escape */
281                 address[NEXTTP()] = *hp;        /* take following unsigned char */
282             }
283         }
284         else switch (state)
285         {
286         case START_HDR:   /* before header colon */
287             if (*hp == ':')
288                 state = SKIP_JUNK;
289             break;
290
291         case SKIP_JUNK:         /* looking for address start */
292             if (*hp == '"')     /* quoted string */
293             {
294                 oldstate = SKIP_JUNK;
295                 state = INSIDE_DQUOTE;
296                 address[NEXTTP()] = *hp;
297             }
298             else if (*hp == '(')        /* address comment -- ignore */
299             {
300                 parendepth = 1;
301                 oldstate = SKIP_JUNK;
302                 state = INSIDE_PARENS;    
303             }
304             else if (*hp == '<')        /* begin <address> */
305             {
306                 state = INSIDE_BRACKETS;
307                 tp = 0;
308             }
309             else if (*hp != ',' && !isspace(*hp))
310             {
311                 --hp;
312                 state = BARE_ADDRESS;
313             }
314             break;
315
316         case BARE_ADDRESS:      /* collecting address without delimiters */
317             if (*hp == ',')     /* end of address */
318             {
319                 if (tp)
320                 {
321                     address[NEXTTP()] = '\0';
322                     state = SKIP_JUNK;
323                     tp = 0;
324                     return(address);
325                 }
326             }
327             else if (*hp == '(')        /* beginning of comment */
328             {
329                 parendepth = 1;
330                 oldstate = BARE_ADDRESS;
331                 state = INSIDE_PARENS;    
332             }
333             else if (*hp == '<')        /* beginning of real address */
334             {
335                 state = INSIDE_BRACKETS;
336                 tp = 0;
337             }
338             else if (*hp == '"')        /* quoted word, copy verbatim */
339             {
340                 oldstate = state;
341                 state = INSIDE_DQUOTE;
342                 address[NEXTTP()] = *hp;
343             }
344             else if (!isspace(*hp))     /* just take it, ignoring whitespace */
345                 address[NEXTTP()] = *hp;
346             break;
347
348         case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
349             if (*hp != '"')
350                 address[NEXTTP()] = *hp;
351             else
352             {
353                 address[NEXTTP()] = *hp;
354                 state = oldstate;
355             }
356             break;
357
358         case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
359             if (*hp == '(')
360                 ++parendepth;
361             else if (*hp == ')')
362                 --parendepth;
363             if (parendepth == 0)
364                 state = oldstate;
365             break;
366
367         case INSIDE_BRACKETS:   /* possible <>-enclosed address */
368             if (*hp == '>')     /* end of address */
369             {
370                 address[NEXTTP()] = '\0';
371                 state = SKIP_JUNK;
372                 ++hp;
373                 tp = 0;
374                 return(address);
375             }
376             else if (*hp == '<')        /* nested <> */
377                 tp = 0;
378             else if (*hp == '"')        /* quoted address */
379             {
380                 address[NEXTTP()] = *hp;
381                 oldstate = INSIDE_BRACKETS;
382                 state = INSIDE_DQUOTE;
383             }
384             else                        /* just copy address */
385                 address[NEXTTP()] = *hp;
386             break;
387         }
388     }
389
390     return(NULL);
391 }
392
393 #ifdef MAIN
394 static void parsebuf(unsigned char *longbuf, int reply)
395 {
396     unsigned char       *cp;
397     size_t              dummy;
398
399     if (reply)
400     {
401         reply_hack(longbuf, "HOSTNAME.NET", &dummy);
402         printf("Rewritten buffer: %s", (char *)longbuf);
403     }
404     else
405         if ((cp = nxtaddr(longbuf)) != (unsigned char *)NULL)
406             do {
407                 printf("\t-> \"%s\"\n", (char *)cp);
408             } while
409                 ((cp = nxtaddr((unsigned char *)NULL)) != (unsigned char *)NULL);
410 }
411
412
413
414 int main(int argc, char *argv[])
415 {
416     unsigned char       buf[BUFSIZ], longbuf[BUFSIZ];
417     int                 ch, reply;
418     
419     verbose = reply = FALSE;
420     while ((ch = getopt(argc, argv, "rv")) != EOF)
421         switch(ch)
422         {
423         case 'r':
424             reply = TRUE;
425             break;
426
427         case 'v':
428             verbose = TRUE;
429             break;
430         }
431
432     while (fgets(buf, sizeof(buf)-1, stdin))
433     {
434         if (buf[0] == ' ' || buf[0] == '\t')
435             strcat(longbuf, buf);
436         else if (!strncasecmp("From: ", buf, 6)
437                     || !strncasecmp("To: ", buf, 4)
438                     || !strncasecmp("Reply-", buf, 6)
439                     || !strncasecmp("Cc: ", buf, 4)
440                     || !strncasecmp("Bcc: ", buf, 5))
441             strcpy(longbuf, buf);       
442         else if (longbuf[0])
443         {
444             if (verbose)
445                 fputs(longbuf, stdout);
446             parsebuf(longbuf, reply);
447             longbuf[0] = '\0';
448         }
449     }
450     if (longbuf[0])
451     {
452         if (verbose)
453             fputs(longbuf, stdout);
454         parsebuf(longbuf, reply);
455     }
456     exit(0);
457 }
458 #endif /* MAIN */
459
460 /* rfc822.c end */