]> Pileus Git - ~andy/fetchmail/blob - rfc822.c
Credit John Beck's fixes.
[~andy/fetchmail] / rfc822.c
1 /*****************************************************************************
2
3 NAME:
4    rfc822.c -- code for slicing and dicing RFC822 mail headers
5
6 ENTRY POINTS:
7    nextaddr() -- parse the next address out of an RFC822 header
8    reply_hack() -- append hostname to local header addresses 
9
10 THEORY:
11    How to parse RFC822 headers in C. This is not a fully conformant
12 implementation of RFC822 or RFC2822, but it has been in production use
13 in a widely-deployed MTA (fetcmail) since 1996 without complaints.
14 Really perverse combinations of quoting and commenting could break it.
15
16 AUTHOR:
17    Eric S. Raymond <esr@thyrsus.com>, 1997.  This source code example
18 is part of fetchmail and the Unix Cookbook, and are released under the
19 MIT license.  Compile with -DMAIN to build the demonstrator.
20
21 ******************************************************************************/
22
23 #include "config.h"
24
25 #include  <stdio.h>
26 #include  <ctype.h>
27 #include  <string.h>
28 #include  <strings.h>
29 #include  <stdlib.h>
30
31 #include "fetchmail.h"
32 #include "sdump.h"
33
34 #ifndef MAIN
35 #include "i18n.h"
36 #else
37 #include  <unistd.h>
38 static int verbose;
39 const char *program_name = "rfc822";
40 #endif /* MAIN */
41
42 #ifndef TRUE
43 #define TRUE 1
44 #define FALSE 0
45 #endif
46
47 #define HEADER_END(p)   ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
48
49 #define BEFORE_EOL(s)   (strcspn((s), "\r\n"))
50
51 char *reply_hack(
52         char *buf               /* header to be hacked */,
53         const char *host        /* server hostname */,
54         size_t *length)
55 /* hack message headers so replies will work properly */
56 {
57     char *from, *cp, last_nws = '\0', *parens_from = NULL;
58     int parendepth, state, has_bare_name_part, has_host_part;
59 #ifndef MAIN
60     int addresscount = 1;
61 #endif /* MAIN */
62
63     if (strncasecmp("From:", buf, 5)
64         && strncasecmp("To:", buf, 3)
65         && strncasecmp("Reply-To:", buf, 9)
66         && strncasecmp("Return-Path:", buf, 12)
67         && strncasecmp("Cc:", buf, 3)
68         && strncasecmp("Bcc:", buf, 4)
69         && strncasecmp("Resent-From:", buf, 12)
70         && strncasecmp("Resent-To:", buf, 10)
71         && strncasecmp("Resent-Cc:", buf, 10)
72         && strncasecmp("Resent-Bcc:", buf, 11)
73         && strncasecmp("Apparently-From:", buf, 16)
74         && strncasecmp("Apparently-To:", buf, 14)
75         && strncasecmp("Sender:", buf, 7)
76         && strncasecmp("Resent-Sender:", buf, 14)
77        ) {
78         return(buf);
79     }
80
81 #ifndef MAIN
82     if (outlevel >= O_DEBUG) {
83         report_build(stdout, GT_("About to rewrite %s...\n"), (cp = sdump(buf, BEFORE_EOL(buf))));
84         xfree(cp);
85     }
86
87     /* make room to hack the address; buf must be malloced */
88     for (cp = buf; *cp; cp++)
89         if (*cp == ',' || isspace((unsigned char)*cp))
90             addresscount++;
91     buf = (char *)xrealloc(buf, strlen(buf) + addresscount * (strlen(host) + 1) + 1);
92 #endif /* MAIN */
93
94     /*
95      * This is going to foo up on some ill-formed addresses.
96      * Note that we don't rewrite the fake address <> in order to
97      * avoid screwing up bounce suppression with a null Return-Path.
98      */
99
100     parendepth = state = 0;
101     has_host_part = has_bare_name_part = FALSE;
102     for (from = buf; *from; from++)
103     {
104 #ifdef MAIN
105         if (verbose)
106         {
107             printf("state %d: %s", state, buf);
108             printf("%*s^\n", (int)(from - buf + 10), " ");
109         }
110 #endif /* MAIN */
111         if (state != 2)
112         {
113             if (*from == '(')
114                 ++parendepth;
115             else if (*from == ')')
116                 --parendepth;
117         }
118
119         if (!parendepth && !has_host_part)
120             switch (state)
121             {
122             case 0:     /* before header colon */
123                 if (*from == ':')
124                     state = 1;
125                 break;
126
127             case 1:     /* we've seen the colon, we're looking for addresses */
128                 if (!isspace((unsigned char)*from))
129                     last_nws = *from;
130                 if (*from == '<')
131                     state = 3;
132                 else if (*from == '@' || *from == '!')
133                     has_host_part = TRUE;
134                 else if (*from == '"')
135                     state = 2;
136                 /*
137                  * Not expanding on last non-WS == ';' deals with groupnames,
138                  * an obscure misfeature described in sections
139                  * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
140                  */
141                 else if ((*from == ',' || HEADER_END(from))
142                          && has_bare_name_part
143                          && !has_host_part
144                          && last_nws != ';')
145                 {
146                     int hostlen;
147                     char *p;
148
149                     p = from;
150                     if (parens_from)
151                         from = parens_from;
152                     while (isspace((unsigned char)*from) || (*from == ','))
153                         --from;
154                     from++;
155                     hostlen = strlen(host);
156                     for (cp = from + strlen(from); cp >= from; --cp)
157                         cp[hostlen+1] = *cp;
158                     *from++ = '@';
159                     memcpy(from, host, hostlen);
160                     from = p + hostlen + 1;
161                     has_host_part = TRUE;
162                 } 
163                 else if (from[1] == '('
164                          && has_bare_name_part
165                          && !has_host_part
166                          && last_nws != ';' && last_nws != ')')
167                 {
168                     parens_from = from;
169                 } 
170                 else if (!isspace((unsigned char)*from))
171                     has_bare_name_part = TRUE;
172                 break;
173
174             case 2:     /* we're in a string */
175                 if (*from == '"')
176                 {
177                     char        *bp;
178                     int         bscount;
179
180                     bscount = 0;
181                     for (bp = from - 1; *bp == '\\'; bp--)
182                         bscount++;
183                     if (!(bscount % 2))
184                         state = 1;
185                 }
186                 break;
187
188             case 3:     /* we're in a <>-enclosed address */
189                 if (*from == '@' || *from == '!')
190                     has_host_part = TRUE;
191                 else if (*from == '>' && (from > buf && from[-1] != '<'))
192                 {
193                     state = 1;
194                     if (!has_host_part)
195                     {
196                         int hostlen;
197
198                         hostlen = strlen(host);
199                         for (cp = from + strlen(from); cp >= from; --cp)
200                             cp[hostlen+1] = *cp;
201                         *from++ = '@';
202                         memcpy(from, host, hostlen);
203                         from += hostlen;
204                         has_host_part = TRUE;
205                     }
206                 }
207                 break;
208             }
209
210         /*
211          * If we passed a comma, reset everything.
212          */
213         if ((from > buf && from[-1] == ',') && !parendepth) {
214           has_host_part = has_bare_name_part = FALSE;
215           parens_from = NULL;
216         }
217     }
218
219 #ifndef MAIN
220     if (outlevel >= O_DEBUG) {
221         report_complete(stdout, GT_("...rewritten version is %s.\n"),
222                         (cp = sdump(buf, BEFORE_EOL(buf))));
223         xfree(cp)
224     }
225
226 #endif /* MAIN */
227     *length = strlen(buf);
228     return(buf);
229 }
230
231 char *nxtaddr(const char *hdr /* header to be parsed, NUL to continue previous hdr */)
232 /* parse addresses in succession out of a specified RFC822 header */
233 {
234     static char address[BUFSIZ];
235     static size_t tp;
236     static const char *hp;
237     static int  state, oldstate;
238 #ifdef MAIN
239     static const char *orighdr;
240 #endif /* MAIN */
241     int parendepth = 0;
242
243 #define START_HDR       0       /* before header colon */
244 #define SKIP_JUNK       1       /* skip whitespace, \n, and junk */
245 #define BARE_ADDRESS    2       /* collecting address without delimiters */
246 #define INSIDE_DQUOTE   3       /* inside double quotes */
247 #define INSIDE_PARENS   4       /* inside parentheses */
248 #define INSIDE_BRACKETS 5       /* inside bracketed address */
249 #define ENDIT_ALL       6       /* after last address */
250
251 #define NEXTTP()        ((tp < sizeof(address)-1) ? tp++ : tp)
252
253     if (hdr)
254     {
255         hp = hdr;
256         state = START_HDR;
257 #ifdef MAIN
258         orighdr = hdr;
259 #endif /* MAIN */
260         tp = 0;
261     }
262
263     if (!hp) return NULL;
264
265     for (; *hp; hp++)
266     {
267 #ifdef MAIN
268         if (verbose)
269         {
270             printf("state %d: %s", state, orighdr);
271             printf("%*s^\n", (int)(hp - orighdr + 10), " ");
272         }
273 #endif /* MAIN */
274
275         if (state == ENDIT_ALL)         /* after last address */
276             return(NULL);
277         else if (HEADER_END(hp))
278         {
279             state = ENDIT_ALL;
280             if (tp)
281             {
282                 while (tp > 0 && isspace((unsigned char)address[tp - 1]))
283                     tp--;
284                 address[tp] = '\0';
285                 tp = 0;
286                 return (address);
287             }
288             return(NULL);
289         }
290         else if (*hp == '\\')           /* handle RFC822 escaping */
291         {
292             if (state != INSIDE_PARENS)
293             {
294                 address[NEXTTP()] = *hp++;      /* take the escape */
295                 address[NEXTTP()] = *hp;        /* take following unsigned char */
296             }
297         }
298         else switch (state)
299         {
300         case START_HDR:   /* before header colon */
301             if (*hp == ':')
302                 state = SKIP_JUNK;
303             break;
304
305         case SKIP_JUNK:         /* looking for address start */
306             if (*hp == '"')     /* quoted string */
307             {
308                 oldstate = SKIP_JUNK;
309                 state = INSIDE_DQUOTE;
310                 address[NEXTTP()] = *hp;
311             }
312             else if (*hp == '(')        /* address comment -- ignore */
313             {
314                 parendepth = 1;
315                 oldstate = SKIP_JUNK;
316                 state = INSIDE_PARENS;    
317             }
318             else if (*hp == '<')        /* begin <address> */
319             {
320                 state = INSIDE_BRACKETS;
321                 tp = 0;
322             }
323             else if (*hp != ',' && !isspace((unsigned char)*hp))
324             {
325                 --hp;
326                 state = BARE_ADDRESS;
327             }
328             break;
329
330         case BARE_ADDRESS:      /* collecting address without delimiters */
331             if (*hp == ',')     /* end of address */
332             {
333                 if (tp)
334                 {
335                     address[NEXTTP()] = '\0';
336                     state = SKIP_JUNK;
337                     tp = 0;
338                     return(address);
339                 }
340             }
341             else if (*hp == '(')        /* beginning of comment */
342             {
343                 parendepth = 1;
344                 oldstate = BARE_ADDRESS;
345                 state = INSIDE_PARENS;    
346             }
347             else if (*hp == '<')        /* beginning of real address */
348             {
349                 state = INSIDE_BRACKETS;
350                 tp = 0;
351             }
352             else if (*hp == '"')        /* quoted word, copy verbatim */
353             {
354                 oldstate = state;
355                 state = INSIDE_DQUOTE;
356                 address[NEXTTP()] = *hp;
357             }
358             else if (!isspace((unsigned char)*hp))      /* just take it, ignoring whitespace */
359                 address[NEXTTP()] = *hp;
360             break;
361
362         case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
363             address[NEXTTP()] = *hp;
364             if (*hp == '"')
365                 state = oldstate;
366             break;
367
368         case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
369             if (*hp == '(')
370                 ++parendepth;
371             else if (*hp == ')')
372                 --parendepth;
373             if (parendepth == 0)
374                 state = oldstate;
375             break;
376
377         case INSIDE_BRACKETS:   /* possible <>-enclosed address */
378             if (*hp == '>')     /* end of address */
379             {
380                 address[NEXTTP()] = '\0';
381                 state = SKIP_JUNK;
382                 ++hp;
383                 tp = 0;
384                 return(address);
385             }
386             else if (*hp == '<')        /* nested <> */
387                 tp = 0;
388             else if (*hp == '"')        /* quoted address */
389             {
390                 address[NEXTTP()] = *hp;
391                 oldstate = INSIDE_BRACKETS;
392                 state = INSIDE_DQUOTE;
393             }
394             else                        /* just copy address */
395                 address[NEXTTP()] = *hp;
396             break;
397         }
398     }
399
400     return(NULL);
401 }
402
403 #ifdef MAIN
404 static void parsebuf(char *longbuf, int reply)
405 {
406     char        *cp;
407     size_t      dummy;
408
409     if (reply)
410     {
411         reply_hack(longbuf, "HOSTNAME.NET", &dummy);
412         printf("Rewritten buffer: %s", (char *)longbuf);
413     }
414     else
415         if ((cp = nxtaddr(longbuf)) != (char *)NULL)
416             do {
417                 printf("\t-> \"%s\"\n", (char *)cp);
418             } while
419                 ((cp = nxtaddr((char *)NULL)) != (char *)NULL);
420 }
421
422
423
424 int main(int argc, char *argv[])
425 {
426     char        buf[BUFSIZ], longbuf[BUFSIZ];
427     int         ch, reply;
428     
429     verbose = reply = FALSE;
430     while ((ch = getopt(argc, argv, "rv")) != EOF)
431         switch(ch)
432         {
433         case 'r':
434             reply = TRUE;
435             break;
436
437         case 'v':
438             verbose = TRUE;
439             break;
440         }
441
442     longbuf[0] = '\0';
443
444     while (fgets(buf, sizeof(buf)-1, stdin))
445     {
446         if (buf[0] == ' ' || buf[0] == '\t')
447             strlcat(longbuf, buf, sizeof(longbuf));
448         else if (!strncasecmp("From: ", buf, 6)
449                     || !strncasecmp("To: ", buf, 4)
450                     || !strncasecmp("Reply-", buf, 6)
451                     || !strncasecmp("Cc: ", buf, 4)
452                     || !strncasecmp("Bcc: ", buf, 5))
453             strlcpy(longbuf, buf, sizeof(longbuf));
454         else if (longbuf[0])
455         {
456             if (verbose)
457                 fputs(longbuf, stdout);
458             parsebuf(longbuf, reply);
459             longbuf[0] = '\0';
460         }
461     }
462     if (longbuf[0])
463     {
464         if (verbose)
465             fputs(longbuf, stdout);
466         parsebuf(longbuf, reply);
467     }
468     exit(0);
469 }
470 #endif /* MAIN */
471
472 /* rfc822.c end */