]> Pileus Git - ~andy/fetchmail/blob - rfc822.c
Fix typo repsonsible -> responsible.
[~andy/fetchmail] / rfc822.c
1 /*****************************************************************************
2
3 NAME:
4    rfc822.c -- code for slicing and dicing RFC822 mail headers
5
6 ENTRY POINTS:
7    nextaddr() -- parse the next address out of an RFC822 header
8    reply_hack() -- append hostname to local header addresses 
9
10 THEORY:
11    How to parse RFC822 headers in C. This is not a fully conformant
12 implementation of RFC822 or RFC2822, but it has been in production use
13 in a widely-deployed MTA (fetcmail) since 1996 without complaints.
14 Really perverse combinations of quoting and commenting could break it.
15
16 AUTHOR:
17    Eric S. Raymond <esr@thyrsus.com>, 1997.  This source code example
18 is part of fetchmail and the Unix Cookbook, and are released under the
19 MIT license.  Compile with -DMAIN to build the demonstrator.
20
21 ******************************************************************************/
22
23 #define _XOPEN_SOURCE 600
24 #define __BSD_VISIBLE 1
25
26 #include "config.h"
27 #include "fetchmail.h"
28
29 #include  <stdio.h>
30 #include  <ctype.h>
31 #include  <string.h>
32 #include  <strings.h>
33 #include  <stdlib.h>
34
35 #include "sdump.h"
36
37 #ifndef MAIN
38 #include "gettext.h"
39 #else
40 #include  <unistd.h>
41 static int verbose;
42 const char *program_name = "rfc822";
43 #endif /* MAIN */
44
45 #ifndef TRUE
46 #define TRUE 1
47 #define FALSE 0
48 #endif
49
50 #define HEADER_END(p)   ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
51
52 #define BEFORE_EOL(s)   (strcspn((s), "\r\n"))
53
54 char *reply_hack(
55         char *buf               /* header to be hacked */,
56         const char *host        /* server hostname */,
57         size_t *length)
58 /* hack message headers so replies will work properly */
59 {
60     char *from, *cp, last_nws = '\0', *parens_from = NULL;
61     int parendepth, state, has_bare_name_part, has_host_part;
62 #ifndef MAIN
63     int addresscount = 1;
64 #endif /* MAIN */
65
66     if (strncasecmp("From:", buf, 5)
67         && strncasecmp("To:", buf, 3)
68         && strncasecmp("Reply-To:", buf, 9)
69         && strncasecmp("Return-Path:", buf, 12)
70         && strncasecmp("Cc:", buf, 3)
71         && strncasecmp("Bcc:", buf, 4)
72         && strncasecmp("Resent-From:", buf, 12)
73         && strncasecmp("Resent-To:", buf, 10)
74         && strncasecmp("Resent-Cc:", buf, 10)
75         && strncasecmp("Resent-Bcc:", buf, 11)
76         && strncasecmp("Apparently-From:", buf, 16)
77         && strncasecmp("Apparently-To:", buf, 14)
78         && strncasecmp("Sender:", buf, 7)
79         && strncasecmp("Resent-Sender:", buf, 14)
80        ) {
81         return(buf);
82     }
83
84 #ifndef MAIN
85     if (outlevel >= O_DEBUG) {
86         report_build(stdout, GT_("About to rewrite %s...\n"), (cp = sdump(buf, BEFORE_EOL(buf))));
87         xfree(cp);
88     }
89
90     /* make room to hack the address; buf must be malloced */
91     for (cp = buf; *cp; cp++)
92         if (*cp == ',' || isspace((unsigned char)*cp))
93             addresscount++;
94     buf = (char *)xrealloc(buf, strlen(buf) + addresscount * (strlen(host) + 1) + 1);
95 #endif /* MAIN */
96
97     /*
98      * This is going to foo up on some ill-formed addresses.
99      * Note that we don't rewrite the fake address <> in order to
100      * avoid screwing up bounce suppression with a null Return-Path.
101      */
102
103     parendepth = state = 0;
104     has_host_part = has_bare_name_part = FALSE;
105     for (from = buf; *from; from++)
106     {
107 #ifdef MAIN
108         if (verbose)
109         {
110             printf("state %d: %s", state, buf);
111             printf("%*s^\n", (int)(from - buf + 10), " ");
112         }
113 #endif /* MAIN */
114         if (state != 2)
115         {
116             if (*from == '(')
117                 ++parendepth;
118             else if (*from == ')')
119                 --parendepth;
120         }
121
122         if (!parendepth && !has_host_part)
123             switch (state)
124             {
125             case 0:     /* before header colon */
126                 if (*from == ':')
127                     state = 1;
128                 break;
129
130             case 1:     /* we've seen the colon, we're looking for addresses */
131                 if (!isspace((unsigned char)*from))
132                     last_nws = *from;
133                 if (*from == '<')
134                     state = 3;
135                 else if (*from == '@' || *from == '!')
136                     has_host_part = TRUE;
137                 else if (*from == '"')
138                     state = 2;
139                 /*
140                  * Not expanding on last non-WS == ';' deals with groupnames,
141                  * an obscure misfeature described in sections
142                  * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
143                  */
144                 else if ((*from == ',' || HEADER_END(from))
145                          && has_bare_name_part
146                          && !has_host_part
147                          && last_nws != ';')
148                 {
149                     int hostlen;
150                     char *p;
151
152                     p = from;
153                     if (parens_from)
154                         from = parens_from;
155                     while (isspace((unsigned char)*from) || (*from == ','))
156                         --from;
157                     from++;
158                     hostlen = strlen(host);
159                     for (cp = from + strlen(from); cp >= from; --cp)
160                         cp[hostlen+1] = *cp;
161                     *from++ = '@';
162                     memcpy(from, host, hostlen);
163                     from = p + hostlen + 1;
164                     has_host_part = TRUE;
165                 } 
166                 else if (from[1] == '('
167                          && has_bare_name_part
168                          && !has_host_part
169                          && last_nws != ';' && last_nws != ')')
170                 {
171                     parens_from = from;
172                 } 
173                 else if (!isspace((unsigned char)*from))
174                     has_bare_name_part = TRUE;
175                 break;
176
177             case 2:     /* we're in a string */
178                 if (*from == '"')
179                 {
180                     char        *bp;
181                     int         bscount;
182
183                     bscount = 0;
184                     for (bp = from - 1; *bp == '\\'; bp--)
185                         bscount++;
186                     if (!(bscount % 2))
187                         state = 1;
188                 }
189                 break;
190
191             case 3:     /* we're in a <>-enclosed address */
192                 if (*from == '@' || *from == '!')
193                     has_host_part = TRUE;
194                 else if (*from == '>' && (from > buf && from[-1] != '<'))
195                 {
196                     state = 1;
197                     if (!has_host_part)
198                     {
199                         int hostlen;
200
201                         hostlen = strlen(host);
202                         for (cp = from + strlen(from); cp >= from; --cp)
203                             cp[hostlen+1] = *cp;
204                         *from++ = '@';
205                         memcpy(from, host, hostlen);
206                         from += hostlen;
207                         has_host_part = TRUE;
208                     }
209                 }
210                 break;
211             }
212
213         /*
214          * If we passed a comma, reset everything.
215          */
216         if ((from > buf && from[-1] == ',') && !parendepth) {
217           has_host_part = has_bare_name_part = FALSE;
218           parens_from = NULL;
219         }
220     }
221
222 #ifndef MAIN
223     if (outlevel >= O_DEBUG) {
224         report_complete(stdout, GT_("...rewritten version is %s.\n"),
225                         (cp = sdump(buf, BEFORE_EOL(buf))));
226         xfree(cp)
227     }
228
229 #endif /* MAIN */
230     *length = strlen(buf);
231     return(buf);
232 }
233
234 char *nxtaddr(const char *hdr /* header to be parsed, NUL to continue previous hdr */)
235 /* parse addresses in succession out of a specified RFC822 header */
236 {
237     static char address[BUFSIZ];
238     static size_t tp;
239     static const char *hp;
240     static int  state, oldstate;
241 #ifdef MAIN
242     static const char *orighdr;
243 #endif /* MAIN */
244     int parendepth = 0;
245
246 #define START_HDR       0       /* before header colon */
247 #define SKIP_JUNK       1       /* skip whitespace, \n, and junk */
248 #define BARE_ADDRESS    2       /* collecting address without delimiters */
249 #define INSIDE_DQUOTE   3       /* inside double quotes */
250 #define INSIDE_PARENS   4       /* inside parentheses */
251 #define INSIDE_BRACKETS 5       /* inside bracketed address */
252 #define ENDIT_ALL       6       /* after last address */
253
254 #define NEXTTP()        ((tp < sizeof(address)-1) ? tp++ : tp)
255
256     if (hdr)
257     {
258         hp = hdr;
259         state = START_HDR;
260 #ifdef MAIN
261         orighdr = hdr;
262 #endif /* MAIN */
263         tp = 0;
264     }
265
266     if (!hp) return NULL;
267
268     for (; *hp; hp++)
269     {
270 #ifdef MAIN
271         if (verbose)
272         {
273             printf("state %d: %s", state, orighdr);
274             printf("%*s^\n", (int)(hp - orighdr + 10), " ");
275         }
276 #endif /* MAIN */
277
278         if (state == ENDIT_ALL)         /* after last address */
279             return(NULL);
280         else if (HEADER_END(hp))
281         {
282             state = ENDIT_ALL;
283             if (tp)
284             {
285                 while (tp > 0 && isspace((unsigned char)address[tp - 1]))
286                     tp--;
287                 address[tp] = '\0';
288                 tp = 0;
289                 return (address);
290             }
291             return(NULL);
292         }
293         else if (*hp == '\\')           /* handle RFC822 escaping */
294         {
295             if (state != INSIDE_PARENS)
296             {
297                 address[NEXTTP()] = *hp++;      /* take the escape */
298                 address[NEXTTP()] = *hp;        /* take following unsigned char */
299             }
300         }
301         else switch (state)
302         {
303         case START_HDR:   /* before header colon */
304             if (*hp == ':')
305                 state = SKIP_JUNK;
306             break;
307
308         case SKIP_JUNK:         /* looking for address start */
309             if (*hp == '"')     /* quoted string */
310             {
311                 oldstate = SKIP_JUNK;
312                 state = INSIDE_DQUOTE;
313                 address[NEXTTP()] = *hp;
314             }
315             else if (*hp == '(')        /* address comment -- ignore */
316             {
317                 parendepth = 1;
318                 oldstate = SKIP_JUNK;
319                 state = INSIDE_PARENS;    
320             }
321             else if (*hp == '<')        /* begin <address> */
322             {
323                 state = INSIDE_BRACKETS;
324                 tp = 0;
325             }
326             else if (*hp != ',' && !isspace((unsigned char)*hp))
327             {
328                 --hp;
329                 state = BARE_ADDRESS;
330             }
331             break;
332
333         case BARE_ADDRESS:      /* collecting address without delimiters */
334             if (*hp == ',')     /* end of address */
335             {
336                 if (tp)
337                 {
338                     address[NEXTTP()] = '\0';
339                     state = SKIP_JUNK;
340                     tp = 0;
341                     return(address);
342                 }
343             }
344             else if (*hp == '(')        /* beginning of comment */
345             {
346                 parendepth = 1;
347                 oldstate = BARE_ADDRESS;
348                 state = INSIDE_PARENS;    
349             }
350             else if (*hp == '<')        /* beginning of real address */
351             {
352                 state = INSIDE_BRACKETS;
353                 tp = 0;
354             }
355             else if (*hp == '"')        /* quoted word, copy verbatim */
356             {
357                 oldstate = state;
358                 state = INSIDE_DQUOTE;
359                 address[NEXTTP()] = *hp;
360             }
361             else if (!isspace((unsigned char)*hp))      /* just take it, ignoring whitespace */
362                 address[NEXTTP()] = *hp;
363             break;
364
365         case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
366             address[NEXTTP()] = *hp;
367             if (*hp == '"')
368                 state = oldstate;
369             break;
370
371         case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
372             if (*hp == '(')
373                 ++parendepth;
374             else if (*hp == ')')
375                 --parendepth;
376             if (parendepth == 0)
377                 state = oldstate;
378             break;
379
380         case INSIDE_BRACKETS:   /* possible <>-enclosed address */
381             if (*hp == '>')     /* end of address */
382             {
383                 address[NEXTTP()] = '\0';
384                 state = SKIP_JUNK;
385                 ++hp;
386                 tp = 0;
387                 return(address);
388             }
389             else if (*hp == '<')        /* nested <> */
390                 tp = 0;
391             else if (*hp == '"')        /* quoted address */
392             {
393                 address[NEXTTP()] = *hp;
394                 oldstate = INSIDE_BRACKETS;
395                 state = INSIDE_DQUOTE;
396             }
397             else                        /* just copy address */
398                 address[NEXTTP()] = *hp;
399             break;
400         }
401     }
402
403     return(NULL);
404 }
405
406 #ifdef MAIN
407 static void parsebuf(char *longbuf, int reply)
408 {
409     char        *cp;
410     size_t      dummy;
411
412     if (reply)
413     {
414         reply_hack(longbuf, "HOSTNAME.NET", &dummy);
415         printf("Rewritten buffer: %s", (char *)longbuf);
416     }
417     else
418         if ((cp = nxtaddr(longbuf)) != (char *)NULL)
419             do {
420                 printf("\t-> \"%s\"\n", (char *)cp);
421             } while
422                 ((cp = nxtaddr((char *)NULL)) != (char *)NULL);
423 }
424
425
426
427 int main(int argc, char *argv[])
428 {
429     char        buf[BUFSIZ], longbuf[BUFSIZ];
430     int         ch, reply;
431     
432     verbose = reply = FALSE;
433     while ((ch = getopt(argc, argv, "rv")) != EOF)
434         switch(ch)
435         {
436         case 'r':
437             reply = TRUE;
438             break;
439
440         case 'v':
441             verbose = TRUE;
442             break;
443         }
444
445     longbuf[0] = '\0';
446
447     while (fgets(buf, sizeof(buf)-1, stdin))
448     {
449         if (buf[0] == ' ' || buf[0] == '\t')
450             strlcat(longbuf, buf, sizeof(longbuf));
451         else if (!strncasecmp("From: ", buf, 6)
452                     || !strncasecmp("To: ", buf, 4)
453                     || !strncasecmp("Reply-", buf, 6)
454                     || !strncasecmp("Cc: ", buf, 4)
455                     || !strncasecmp("Bcc: ", buf, 5))
456             strlcpy(longbuf, buf, sizeof(longbuf));
457         else if (longbuf[0])
458         {
459             if (verbose)
460                 fputs(longbuf, stdout);
461             parsebuf(longbuf, reply);
462             longbuf[0] = '\0';
463         }
464     }
465     if (longbuf[0])
466     {
467         if (verbose)
468             fputs(longbuf, stdout);
469         parsebuf(longbuf, reply);
470     }
471     exit(0);
472 }
473 #endif /* MAIN */
474
475 /* rfc822.c end */