]> Pileus Git - ~andy/fetchmail/blob - rfc822.c
First round ofmlong-delayed bug fixes.
[~andy/fetchmail] / rfc822.c
1 /*****************************************************************************
2
3 NAME:
4    rfc822.c -- code for slicing and dicing RFC822 mail headers
5
6 ENTRY POINTS:
7    nextaddr() -- parse the next address out of an RFC822 header
8    reply_hack() -- append hostname to local header addresses 
9
10 THEORY:
11    How to parse RFC822 headers in C. This is not a fully conformant
12 implementation of RFC822 or RFC2822, but it has been in production use
13 in a widely-deployed MTA (fetcmail) since 1996 without complaints.
14 Really perverse combinations of quoting and commenting could break it.
15
16 AUTHOR:
17    Eric S. Raymond <esr@thyrsus.com>, 1997.  This source code example
18 is part of fetchmail and the Unix Cookbook, and are released under the
19 MIT license.  Compile with -DMAIN to build the demonstrator.
20
21 ******************************************************************************/
22 #include  <stdio.h>
23 #include  <ctype.h>
24 #include  <string.h>
25 #include  <stdlib.h>
26
27 #ifndef MAIN
28 #include "fetchmail.h"
29 #include "i18n.h"
30 #else
31 static int verbose;
32 char *program_name = "rfc822";
33 #endif /* MAIN */
34
35 #ifndef TRUE
36 #define TRUE 1
37 #define FALSE 0
38 #endif
39
40 #define HEADER_END(p)   ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
41
42 unsigned char *reply_hack(buf, host)
43 /* hack message headers so replies will work properly */
44 unsigned char *buf;             /* header to be hacked */
45 const unsigned char *host;      /* server hostname */
46 {
47     unsigned char *from, *cp, last_nws = '\0', *parens_from = NULL;
48     int parendepth, state, has_bare_name_part, has_host_part;
49 #ifndef MAIN
50     int addresscount = 1;
51 #endif /* MAIN */
52
53     if (strncasecmp("From:", buf, 5)
54         && strncasecmp("To:", buf, 3)
55         && strncasecmp("Reply-To:", buf, 9)
56         && strncasecmp("Return-Path:", buf, 12)
57         && strncasecmp("Cc:", buf, 3)
58         && strncasecmp("Bcc:", buf, 4)
59         && strncasecmp("Resent-From:", buf, 12)
60         && strncasecmp("Resent-To:", buf, 10)
61         && strncasecmp("Resent-Cc:", buf, 10)
62         && strncasecmp("Resent-Bcc:", buf, 11)
63         && strncasecmp("Apparently-From:", buf, 16)
64         && strncasecmp("Apparently-To:", buf, 14)
65         && strncasecmp("Sender:", buf, 7)
66         && strncasecmp("Resent-Sender:", buf, 14)
67        ) {
68         return(buf);
69     }
70
71 #ifndef MAIN
72     if (outlevel >= O_DEBUG)
73         report_build(stdout, GT_("About to rewrite %s"), buf);
74
75     /* make room to hack the address; buf must be malloced */
76     for (cp = buf; *cp; cp++)
77         if (*cp == ',' || isspace(*cp))
78             addresscount++;
79     buf = (unsigned char *)xrealloc(buf, strlen(buf) + addresscount * (strlen(host) + 1) + 1);
80 #endif /* MAIN */
81
82     /*
83      * This is going to foo up on some ill-formed addresses.
84      * Note that we don't rewrite the fake address <> in order to
85      * avoid screwing up bounce suppression with a null Return-Path.
86      */
87
88     parendepth = state = 0;
89     has_host_part = has_bare_name_part = FALSE;
90     for (from = buf; *from; from++)
91     {
92 #ifdef MAIN
93         if (verbose)
94         {
95             printf("state %d: %s", state, buf);
96             printf("%*s^\n", from - buf + 10, " ");
97         }
98 #endif /* MAIN */
99         if (state != 2)
100         {
101             if (*from == '(')
102                 ++parendepth;
103             else if (*from == ')')
104                 --parendepth;
105         }
106
107         if (!parendepth && !has_host_part)
108             switch (state)
109             {
110             case 0:     /* before header colon */
111                 if (*from == ':')
112                     state = 1;
113                 break;
114
115             case 1:     /* we've seen the colon, we're looking for addresses */
116                 if (!isspace(*from))
117                     last_nws = *from;
118                 if (*from == '<')
119                     state = 3;
120                 else if (*from == '@' || *from == '!')
121                     has_host_part = TRUE;
122                 else if (*from == '"')
123                     state = 2;
124                 /*
125                  * Not expanding on last non-WS == ';' deals with groupnames,
126                  * an obscure misfeature described in sections
127                  * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
128                  */
129                 else if ((*from == ',' || HEADER_END(from))
130                          && has_bare_name_part
131                          && !has_host_part
132                          && last_nws != ';')
133                 {
134                     int hostlen;
135                     unsigned char *p;
136
137                     p = from;
138                     if (parens_from)
139                         from = parens_from;
140                     while (isspace(*from) || (*from == ','))
141                         --from;
142                     from++;
143                     hostlen = strlen(host);
144                     for (cp = from + strlen(from); cp >= from; --cp)
145                         cp[hostlen+1] = *cp;
146                     *from++ = '@';
147                     memcpy(from, host, hostlen);
148                     from = p + hostlen + 1;
149                     has_host_part = TRUE;
150                 } 
151                 else if (from[1] == '('
152                          && has_bare_name_part
153                          && !has_host_part
154                          && last_nws != ';' && last_nws != ')')
155                 {
156                     parens_from = from;
157                 } 
158                 else if (!isspace(*from))
159                     has_bare_name_part = TRUE;
160                 break;
161
162             case 2:     /* we're in a string */
163                 if (*from == '"')
164                 {
165                     char        *bp;
166                     int         bscount;
167
168                     bscount = 0;
169                     for (bp = from - 1; *bp == '\\'; bp--)
170                         bscount++;
171                     if (!(bscount % 2))
172                         state = 1;
173                 }
174                 break;
175
176             case 3:     /* we're in a <>-enclosed address */
177                 if (*from == '@' || *from == '!')
178                     has_host_part = TRUE;
179                 else if (*from == '>' && from[-1] != '<')
180                 {
181                     state = 1;
182                     if (!has_host_part)
183                     {
184                         int hostlen;
185
186                         hostlen = strlen(host);
187                         for (cp = from + strlen(from); cp >= from; --cp)
188                             cp[hostlen+1] = *cp;
189                         *from++ = '@';
190                         memcpy(from, host, hostlen);
191                         from += hostlen;
192                         has_host_part = TRUE;
193                     }
194                 }
195                 break;
196             }
197
198         /*
199          * If we passed a comma, reset everything.
200          */
201         if (from[-1] == ',' && !parendepth) {
202           has_host_part = has_bare_name_part = FALSE;
203           parens_from = NULL;
204         }
205     }
206
207 #ifndef MAIN
208     if (outlevel >= O_DEBUG)
209         report_complete(stdout, GT_("Rewritten version is %s\n"), buf);
210 #endif /* MAIN */
211     return(buf);
212 }
213
214 unsigned char *nxtaddr(hdr)
215 /* parse addresses in succession out of a specified RFC822 header */
216 const unsigned char *hdr;       /* header to be parsed, NUL to continue previous hdr */
217 {
218     static unsigned char address[BUFSIZ];
219     static int tp;
220     static const unsigned char *hp;
221     static int  state, oldstate;
222 #ifdef MAIN
223     static const unsigned char *orighdr;
224 #endif /* MAIN */
225     int parendepth = 0;
226
227 #define START_HDR       0       /* before header colon */
228 #define SKIP_JUNK       1       /* skip whitespace, \n, and junk */
229 #define BARE_ADDRESS    2       /* collecting address without delimiters */
230 #define INSIDE_DQUOTE   3       /* inside double quotes */
231 #define INSIDE_PARENS   4       /* inside parentheses */
232 #define INSIDE_BRACKETS 5       /* inside bracketed address */
233 #define ENDIT_ALL       6       /* after last address */
234
235 #define NEXTTP()        ((tp < sizeof(address)-1) ? tp++ : tp)
236
237     if (hdr)
238     {
239         hp = hdr;
240         state = START_HDR;
241 #ifdef MAIN
242         orighdr = hdr;
243 #endif /* MAIN */
244         tp = 0;
245     }
246
247     for (; *hp; hp++)
248     {
249 #ifdef MAIN
250         if (verbose)
251         {
252             printf("state %d: %s", state, orighdr);
253             printf("%*s^\n", hp - orighdr + 10, " ");
254         }
255 #endif /* MAIN */
256
257         if (state == ENDIT_ALL)         /* after last address */
258             return(NULL);
259         else if (HEADER_END(hp))
260         {
261             state = ENDIT_ALL;
262             if (tp)
263             {
264                 while (isspace(address[--tp]))
265                     continue;
266                 address[++tp] = '\0';
267                 tp = 0;
268                 return (address);
269             }
270             return((unsigned char *)NULL);
271         }
272         else if (*hp == '\\')           /* handle RFC822 escaping */
273         {
274             if (state != INSIDE_PARENS)
275             {
276                 address[NEXTTP()] = *hp++;      /* take the escape */
277                 address[NEXTTP()] = *hp;        /* take following unsigned char */
278             }
279         }
280         else switch (state)
281         {
282         case START_HDR:   /* before header colon */
283             if (*hp == ':')
284                 state = SKIP_JUNK;
285             break;
286
287         case SKIP_JUNK:         /* looking for address start */
288             if (*hp == '"')     /* quoted string */
289             {
290                 oldstate = SKIP_JUNK;
291                 state = INSIDE_DQUOTE;
292                 address[NEXTTP()] = *hp;
293             }
294             else if (*hp == '(')        /* address comment -- ignore */
295             {
296                 parendepth = 1;
297                 oldstate = SKIP_JUNK;
298                 state = INSIDE_PARENS;    
299             }
300             else if (*hp == '<')        /* begin <address> */
301             {
302                 state = INSIDE_BRACKETS;
303                 tp = 0;
304             }
305             else if (*hp != ',' && !isspace(*hp))
306             {
307                 --hp;
308                 state = BARE_ADDRESS;
309             }
310             break;
311
312         case BARE_ADDRESS:      /* collecting address without delimiters */
313             if (*hp == ',')     /* end of address */
314             {
315                 if (tp)
316                 {
317                     address[NEXTTP()] = '\0';
318                     state = SKIP_JUNK;
319                     tp = 0;
320                     return(address);
321                 }
322             }
323             else if (*hp == '(')        /* beginning of comment */
324             {
325                 parendepth = 1;
326                 oldstate = BARE_ADDRESS;
327                 state = INSIDE_PARENS;    
328             }
329             else if (*hp == '<')        /* beginning of real address */
330             {
331                 state = INSIDE_BRACKETS;
332                 tp = 0;
333             }
334             else if (*hp == '"')        /* quoted word, copy verbatim */
335             {
336                 oldstate = state;
337                 state = INSIDE_DQUOTE;
338                 address[NEXTTP()] = *hp;
339             }
340             else if (!isspace(*hp))     /* just take it, ignoring whitespace */
341                 address[NEXTTP()] = *hp;
342             break;
343
344         case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
345             if (*hp != '"')
346                 address[NEXTTP()] = *hp;
347             else
348             {
349                 address[NEXTTP()] = *hp;
350                 state = oldstate;
351             }
352             break;
353
354         case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
355             if (*hp == '(')
356                 ++parendepth;
357             else if (*hp == ')')
358                 --parendepth;
359             if (parendepth == 0)
360                 state = oldstate;
361             break;
362
363         case INSIDE_BRACKETS:   /* possible <>-enclosed address */
364             if (*hp == '>')     /* end of address */
365             {
366                 address[NEXTTP()] = '\0';
367                 state = SKIP_JUNK;
368                 ++hp;
369                 tp = 0;
370                 return(address);
371             }
372             else if (*hp == '<')        /* nested <> */
373                 tp = 0;
374             else if (*hp == '"')        /* quoted address */
375             {
376                 address[NEXTTP()] = *hp;
377                 oldstate = INSIDE_BRACKETS;
378                 state = INSIDE_DQUOTE;
379             }
380             else                        /* just copy address */
381                 address[NEXTTP()] = *hp;
382             break;
383         }
384     }
385
386     return(NULL);
387 }
388
389 #ifdef MAIN
390 static void parsebuf(unsigned char *longbuf, int reply)
391 {
392     unsigned char       *cp;
393
394     if (reply)
395     {
396         reply_hack(longbuf, "HOSTNAME.NET");
397         printf("Rewritten buffer: %s", longbuf);
398     }
399     else
400         if ((cp = nxtaddr(longbuf)) != (unsigned char *)NULL)
401             do {
402                 printf("\t-> \"%s\"\n", cp);
403             } while
404                 ((cp = nxtaddr((unsigned char *)NULL)) != (unsigned char *)NULL);
405 }
406
407
408
409 main(int argc, char *argv[])
410 {
411     unsigned char       buf[BUFSIZ], longbuf[BUFSIZ];
412     int                 ch, reply;
413     
414     verbose = reply = FALSE;
415     while ((ch = getopt(argc, argv, "rv")) != EOF)
416         switch(ch)
417         {
418         case 'r':
419             reply = TRUE;
420             break;
421
422         case 'v':
423             verbose = TRUE;
424             break;
425         }
426
427     while (fgets(buf, sizeof(buf)-1, stdin))
428     {
429         if (buf[0] == ' ' || buf[0] == '\t')
430             strcat(longbuf, buf);
431         else if (!strncasecmp("From: ", buf, 6)
432                     || !strncasecmp("To: ", buf, 4)
433                     || !strncasecmp("Reply-", buf, 6)
434                     || !strncasecmp("Cc: ", buf, 4)
435                     || !strncasecmp("Bcc: ", buf, 5))
436             strcpy(longbuf, buf);       
437         else if (longbuf[0])
438         {
439             if (verbose)
440                 fputs(longbuf, stdout);
441             parsebuf(longbuf, reply);
442             longbuf[0] = '\0';
443         }
444     }
445     if (longbuf[0])
446     {
447         if (verbose)
448             fputs(longbuf, stdout);
449         parsebuf(longbuf, reply);
450     }
451 }
452 #endif /* MAIN */
453
454 /* rfc822.c end */