]> Pileus Git - ~andy/fetchmail/blob - rfc822.c
Prevent a potential remote exploit.
[~andy/fetchmail] / rfc822.c
1 /*****************************************************************************
2
3 NAME:
4    rfc822.c -- code for slicing and dicing RFC822 mail headers
5
6 ENTRY POINTS:
7    nextaddr() -- parse the next address out of an RFC822 header
8    reply_hack() -- append hostname to local header addresses 
9
10 THEORY:
11    How to parse RFC822 headers in C. This is not a fully conformant
12 implementation of RFC822 or RFC2822, but it has been in production use
13 in a widely-deployed MTA (fetcmail) since 1996 without complaints.
14 Really perverse combinations of quoting and commenting could break it.
15
16 AUTHOR:
17    Eric S. Raymond <esr@thyrsus.com>, 1997.  This source code example
18 is part of fetchmail and the Unix Cookbook, and are released under the
19 MIT license.  Compile with -DMAIN to build the demonstrator.
20
21 ******************************************************************************/
22 #include  <stdio.h>
23 #include  <ctype.h>
24 #include  <string.h>
25 #include  <stdlib.h>
26
27 #ifndef MAIN
28 #include "fetchmail.h"
29 #include "i18n.h"
30 #else
31 static int verbose;
32 char *program_name = "rfc822";
33 #endif /* MAIN */
34
35 #ifndef TRUE
36 #define TRUE 1
37 #define FALSE 0
38 #endif
39
40 #define HEADER_END(p)   ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
41
42 unsigned char *reply_hack(buf, host, length)
43 /* hack message headers so replies will work properly */
44 unsigned char *buf;             /* header to be hacked */
45 const unsigned char *host;      /* server hostname */
46 int *length;
47 {
48     unsigned char *from, *cp, last_nws = '\0', *parens_from = NULL;
49     int parendepth, state, has_bare_name_part, has_host_part;
50 #ifndef MAIN
51     int addresscount = 1;
52 #endif /* MAIN */
53
54     if (strncasecmp("From:", buf, 5)
55         && strncasecmp("To:", buf, 3)
56         && strncasecmp("Reply-To:", buf, 9)
57         && strncasecmp("Return-Path:", buf, 12)
58         && strncasecmp("Cc:", buf, 3)
59         && strncasecmp("Bcc:", buf, 4)
60         && strncasecmp("Resent-From:", buf, 12)
61         && strncasecmp("Resent-To:", buf, 10)
62         && strncasecmp("Resent-Cc:", buf, 10)
63         && strncasecmp("Resent-Bcc:", buf, 11)
64         && strncasecmp("Apparently-From:", buf, 16)
65         && strncasecmp("Apparently-To:", buf, 14)
66         && strncasecmp("Sender:", buf, 7)
67         && strncasecmp("Resent-Sender:", buf, 14)
68        ) {
69         return(buf);
70     }
71
72 #ifndef MAIN
73     if (outlevel >= O_DEBUG)
74         report_build(stdout, GT_("About to rewrite %s"), buf);
75
76     /* make room to hack the address; buf must be malloced */
77     for (cp = buf; *cp; cp++)
78         if (*cp == ',' || isspace(*cp))
79             addresscount++;
80     buf = (unsigned char *)xrealloc(buf, strlen(buf) + addresscount * (strlen(host) + 1) + 1);
81 #endif /* MAIN */
82
83     /*
84      * This is going to foo up on some ill-formed addresses.
85      * Note that we don't rewrite the fake address <> in order to
86      * avoid screwing up bounce suppression with a null Return-Path.
87      */
88
89     parendepth = state = 0;
90     has_host_part = has_bare_name_part = FALSE;
91     for (from = buf; *from; from++)
92     {
93 #ifdef MAIN
94         if (verbose)
95         {
96             printf("state %d: %s", state, buf);
97             printf("%*s^\n", from - buf + 10, " ");
98         }
99 #endif /* MAIN */
100         if (state != 2)
101         {
102             if (*from == '(')
103                 ++parendepth;
104             else if (*from == ')')
105                 --parendepth;
106         }
107
108         if (!parendepth && !has_host_part)
109             switch (state)
110             {
111             case 0:     /* before header colon */
112                 if (*from == ':')
113                     state = 1;
114                 break;
115
116             case 1:     /* we've seen the colon, we're looking for addresses */
117                 if (!isspace(*from))
118                     last_nws = *from;
119                 if (*from == '<')
120                     state = 3;
121                 else if (*from == '@' || *from == '!')
122                     has_host_part = TRUE;
123                 else if (*from == '"')
124                     state = 2;
125                 /*
126                  * Not expanding on last non-WS == ';' deals with groupnames,
127                  * an obscure misfeature described in sections
128                  * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
129                  */
130                 else if ((*from == ',' || HEADER_END(from))
131                          && has_bare_name_part
132                          && !has_host_part
133                          && last_nws != ';')
134                 {
135                     int hostlen;
136                     unsigned char *p;
137
138                     p = from;
139                     if (parens_from)
140                         from = parens_from;
141                     while (isspace(*from) || (*from == ','))
142                         --from;
143                     from++;
144                     hostlen = strlen(host);
145                     for (cp = from + strlen(from); cp >= from; --cp)
146                         cp[hostlen+1] = *cp;
147                     *from++ = '@';
148                     memcpy(from, host, hostlen);
149                     from = p + hostlen + 1;
150                     has_host_part = TRUE;
151                 } 
152                 else if (from[1] == '('
153                          && has_bare_name_part
154                          && !has_host_part
155                          && last_nws != ';' && last_nws != ')')
156                 {
157                     parens_from = from;
158                 } 
159                 else if (!isspace(*from))
160                     has_bare_name_part = TRUE;
161                 break;
162
163             case 2:     /* we're in a string */
164                 if (*from == '"')
165                 {
166                     char        *bp;
167                     int         bscount;
168
169                     bscount = 0;
170                     for (bp = from - 1; *bp == '\\'; bp--)
171                         bscount++;
172                     if (!(bscount % 2))
173                         state = 1;
174                 }
175                 break;
176
177             case 3:     /* we're in a <>-enclosed address */
178                 if (*from == '@' || *from == '!')
179                     has_host_part = TRUE;
180                 else if (*from == '>' && (from > buf && from[-1] != '<'))
181                 {
182                     state = 1;
183                     if (!has_host_part)
184                     {
185                         int hostlen;
186
187                         hostlen = strlen(host);
188                         for (cp = from + strlen(from); cp >= from; --cp)
189                             cp[hostlen+1] = *cp;
190                         *from++ = '@';
191                         memcpy(from, host, hostlen);
192                         from += hostlen;
193                         has_host_part = TRUE;
194                     }
195                 }
196                 break;
197             }
198
199         /*
200          * If we passed a comma, reset everything.
201          */
202         if ((from > buf && from[-1] == ',') && !parendepth) {
203           has_host_part = has_bare_name_part = FALSE;
204           parens_from = NULL;
205         }
206     }
207
208 #ifndef MAIN
209     if (outlevel >= O_DEBUG)
210         report_complete(stdout, GT_("Rewritten version is %s\n"), buf);
211 #endif /* MAIN */
212     *length = strlen(buf);
213     return(buf);
214 }
215
216 unsigned char *nxtaddr(hdr)
217 /* parse addresses in succession out of a specified RFC822 header */
218 const unsigned char *hdr;       /* header to be parsed, NUL to continue previous hdr */
219 {
220     static unsigned char address[BUFSIZ];
221     static int tp;
222     static const unsigned char *hp;
223     static int  state, oldstate;
224 #ifdef MAIN
225     static const unsigned char *orighdr;
226 #endif /* MAIN */
227     int parendepth = 0;
228
229 #define START_HDR       0       /* before header colon */
230 #define SKIP_JUNK       1       /* skip whitespace, \n, and junk */
231 #define BARE_ADDRESS    2       /* collecting address without delimiters */
232 #define INSIDE_DQUOTE   3       /* inside double quotes */
233 #define INSIDE_PARENS   4       /* inside parentheses */
234 #define INSIDE_BRACKETS 5       /* inside bracketed address */
235 #define ENDIT_ALL       6       /* after last address */
236
237 #define NEXTTP()        ((tp < sizeof(address)-1) ? tp++ : tp)
238
239     if (hdr)
240     {
241         hp = hdr;
242         state = START_HDR;
243 #ifdef MAIN
244         orighdr = hdr;
245 #endif /* MAIN */
246         tp = 0;
247     }
248
249     for (; *hp; hp++)
250     {
251 #ifdef MAIN
252         if (verbose)
253         {
254             printf("state %d: %s", state, orighdr);
255             printf("%*s^\n", hp - orighdr + 10, " ");
256         }
257 #endif /* MAIN */
258
259         if (state == ENDIT_ALL)         /* after last address */
260             return(NULL);
261         else if (HEADER_END(hp))
262         {
263             state = ENDIT_ALL;
264             if (tp)
265             {
266                 while (isspace(address[--tp]))
267                     continue;
268                 address[++tp] = '\0';
269                 tp = 0;
270                 return (address);
271             }
272             return((unsigned char *)NULL);
273         }
274         else if (*hp == '\\')           /* handle RFC822 escaping */
275         {
276             if (state != INSIDE_PARENS)
277             {
278                 address[NEXTTP()] = *hp++;      /* take the escape */
279                 address[NEXTTP()] = *hp;        /* take following unsigned char */
280             }
281         }
282         else switch (state)
283         {
284         case START_HDR:   /* before header colon */
285             if (*hp == ':')
286                 state = SKIP_JUNK;
287             break;
288
289         case SKIP_JUNK:         /* looking for address start */
290             if (*hp == '"')     /* quoted string */
291             {
292                 oldstate = SKIP_JUNK;
293                 state = INSIDE_DQUOTE;
294                 address[NEXTTP()] = *hp;
295             }
296             else if (*hp == '(')        /* address comment -- ignore */
297             {
298                 parendepth = 1;
299                 oldstate = SKIP_JUNK;
300                 state = INSIDE_PARENS;    
301             }
302             else if (*hp == '<')        /* begin <address> */
303             {
304                 state = INSIDE_BRACKETS;
305                 tp = 0;
306             }
307             else if (*hp != ',' && !isspace(*hp))
308             {
309                 --hp;
310                 state = BARE_ADDRESS;
311             }
312             break;
313
314         case BARE_ADDRESS:      /* collecting address without delimiters */
315             if (*hp == ',')     /* end of address */
316             {
317                 if (tp)
318                 {
319                     address[NEXTTP()] = '\0';
320                     state = SKIP_JUNK;
321                     tp = 0;
322                     return(address);
323                 }
324             }
325             else if (*hp == '(')        /* beginning of comment */
326             {
327                 parendepth = 1;
328                 oldstate = BARE_ADDRESS;
329                 state = INSIDE_PARENS;    
330             }
331             else if (*hp == '<')        /* beginning of real address */
332             {
333                 state = INSIDE_BRACKETS;
334                 tp = 0;
335             }
336             else if (*hp == '"')        /* quoted word, copy verbatim */
337             {
338                 oldstate = state;
339                 state = INSIDE_DQUOTE;
340                 address[NEXTTP()] = *hp;
341             }
342             else if (!isspace(*hp))     /* just take it, ignoring whitespace */
343                 address[NEXTTP()] = *hp;
344             break;
345
346         case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
347             if (*hp != '"')
348                 address[NEXTTP()] = *hp;
349             else
350             {
351                 address[NEXTTP()] = *hp;
352                 state = oldstate;
353             }
354             break;
355
356         case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
357             if (*hp == '(')
358                 ++parendepth;
359             else if (*hp == ')')
360                 --parendepth;
361             if (parendepth == 0)
362                 state = oldstate;
363             break;
364
365         case INSIDE_BRACKETS:   /* possible <>-enclosed address */
366             if (*hp == '>')     /* end of address */
367             {
368                 address[NEXTTP()] = '\0';
369                 state = SKIP_JUNK;
370                 ++hp;
371                 tp = 0;
372                 return(address);
373             }
374             else if (*hp == '<')        /* nested <> */
375                 tp = 0;
376             else if (*hp == '"')        /* quoted address */
377             {
378                 address[NEXTTP()] = *hp;
379                 oldstate = INSIDE_BRACKETS;
380                 state = INSIDE_DQUOTE;
381             }
382             else                        /* just copy address */
383                 address[NEXTTP()] = *hp;
384             break;
385         }
386     }
387
388     return(NULL);
389 }
390
391 #ifdef MAIN
392 static void parsebuf(unsigned char *longbuf, int reply)
393 {
394     unsigned char       *cp;
395
396     if (reply)
397     {
398         reply_hack(longbuf, "HOSTNAME.NET");
399         printf("Rewritten buffer: %s", longbuf);
400     }
401     else
402         if ((cp = nxtaddr(longbuf)) != (unsigned char *)NULL)
403             do {
404                 printf("\t-> \"%s\"\n", cp);
405             } while
406                 ((cp = nxtaddr((unsigned char *)NULL)) != (unsigned char *)NULL);
407 }
408
409
410
411 main(int argc, char *argv[])
412 {
413     unsigned char       buf[BUFSIZ], longbuf[BUFSIZ];
414     int                 ch, reply;
415     
416     verbose = reply = FALSE;
417     while ((ch = getopt(argc, argv, "rv")) != EOF)
418         switch(ch)
419         {
420         case 'r':
421             reply = TRUE;
422             break;
423
424         case 'v':
425             verbose = TRUE;
426             break;
427         }
428
429     while (fgets(buf, sizeof(buf)-1, stdin))
430     {
431         if (buf[0] == ' ' || buf[0] == '\t')
432             strcat(longbuf, buf);
433         else if (!strncasecmp("From: ", buf, 6)
434                     || !strncasecmp("To: ", buf, 4)
435                     || !strncasecmp("Reply-", buf, 6)
436                     || !strncasecmp("Cc: ", buf, 4)
437                     || !strncasecmp("Bcc: ", buf, 5))
438             strcpy(longbuf, buf);       
439         else if (longbuf[0])
440         {
441             if (verbose)
442                 fputs(longbuf, stdout);
443             parsebuf(longbuf, reply);
444             longbuf[0] = '\0';
445         }
446     }
447     if (longbuf[0])
448     {
449         if (verbose)
450             fputs(longbuf, stdout);
451         parsebuf(longbuf, reply);
452     }
453 }
454 #endif /* MAIN */
455
456 /* rfc822.c end */