]> Pileus Git - ~andy/fetchmail/blob - rfc822.c
Cast strcspn result to int.
[~andy/fetchmail] / rfc822.c
1 /*****************************************************************************
2
3 NAME:
4    rfc822.c -- code for slicing and dicing RFC822 mail headers
5
6 ENTRY POINTS:
7    nextaddr() -- parse the next address out of an RFC822 header
8    reply_hack() -- append hostname to local header addresses 
9
10 THEORY:
11    How to parse RFC822 headers in C. This is not a fully conformant
12 implementation of RFC822 or RFC2822, but it has been in production use
13 in a widely-deployed MTA (fetcmail) since 1996 without complaints.
14 Really perverse combinations of quoting and commenting could break it.
15
16 AUTHOR:
17    Eric S. Raymond <esr@thyrsus.com>, 1997.  This source code example
18 is part of fetchmail and the Unix Cookbook, and are released under the
19 MIT license.  Compile with -DMAIN to build the demonstrator.
20
21 ******************************************************************************/
22 #include  <stdio.h>
23 #include  <ctype.h>
24 #include  <string.h>
25 #include  <stdlib.h>
26
27 #include "fetchmail.h"
28
29 #ifndef MAIN
30 #include "i18n.h"
31 #else
32 #include  <unistd.h>
33 static int verbose;
34 const char *program_name = "rfc822";
35 #endif /* MAIN */
36
37 #ifndef TRUE
38 #define TRUE 1
39 #define FALSE 0
40 #endif
41
42 #define HEADER_END(p)   ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
43
44 #define BEFORE_EOL(s)   strcspn((s), "\r\n"))
45
46 char *reply_hack(
47         char *buf               /* header to be hacked */,
48         const char *host        /* server hostname */,
49         size_t *length)
50 /* hack message headers so replies will work properly */
51 {
52     char *from, *cp, last_nws = '\0', *parens_from = NULL;
53     int parendepth, state, has_bare_name_part, has_host_part;
54 #ifndef MAIN
55     int addresscount = 1;
56 #endif /* MAIN */
57
58     if (strncasecmp("From:", buf, 5)
59         && strncasecmp("To:", buf, 3)
60         && strncasecmp("Reply-To:", buf, 9)
61         && strncasecmp("Return-Path:", buf, 12)
62         && strncasecmp("Cc:", buf, 3)
63         && strncasecmp("Bcc:", buf, 4)
64         && strncasecmp("Resent-From:", buf, 12)
65         && strncasecmp("Resent-To:", buf, 10)
66         && strncasecmp("Resent-Cc:", buf, 10)
67         && strncasecmp("Resent-Bcc:", buf, 11)
68         && strncasecmp("Apparently-From:", buf, 16)
69         && strncasecmp("Apparently-To:", buf, 14)
70         && strncasecmp("Sender:", buf, 7)
71         && strncasecmp("Resent-Sender:", buf, 14)
72        ) {
73         return(buf);
74     }
75
76 #ifndef MAIN
77     if (outlevel >= O_DEBUG)
78         report_build(stdout, GT_("About to rewrite %.*s...\n"),
79                         (int)BEFORE_EOL(buf), buf);
80
81     /* make room to hack the address; buf must be malloced */
82     for (cp = buf; *cp; cp++)
83         if (*cp == ',' || isspace((unsigned char)*cp))
84             addresscount++;
85     buf = (char *)xrealloc(buf, strlen(buf) + addresscount * (strlen(host) + 1) + 1);
86 #endif /* MAIN */
87
88     /*
89      * This is going to foo up on some ill-formed addresses.
90      * Note that we don't rewrite the fake address <> in order to
91      * avoid screwing up bounce suppression with a null Return-Path.
92      */
93
94     parendepth = state = 0;
95     has_host_part = has_bare_name_part = FALSE;
96     for (from = buf; *from; from++)
97     {
98 #ifdef MAIN
99         if (verbose)
100         {
101             printf("state %d: %s", state, buf);
102             printf("%*s^\n", (int)(from - buf + 10), " ");
103         }
104 #endif /* MAIN */
105         if (state != 2)
106         {
107             if (*from == '(')
108                 ++parendepth;
109             else if (*from == ')')
110                 --parendepth;
111         }
112
113         if (!parendepth && !has_host_part)
114             switch (state)
115             {
116             case 0:     /* before header colon */
117                 if (*from == ':')
118                     state = 1;
119                 break;
120
121             case 1:     /* we've seen the colon, we're looking for addresses */
122                 if (!isspace((unsigned char)*from))
123                     last_nws = *from;
124                 if (*from == '<')
125                     state = 3;
126                 else if (*from == '@' || *from == '!')
127                     has_host_part = TRUE;
128                 else if (*from == '"')
129                     state = 2;
130                 /*
131                  * Not expanding on last non-WS == ';' deals with groupnames,
132                  * an obscure misfeature described in sections
133                  * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
134                  */
135                 else if ((*from == ',' || HEADER_END(from))
136                          && has_bare_name_part
137                          && !has_host_part
138                          && last_nws != ';')
139                 {
140                     int hostlen;
141                     char *p;
142
143                     p = from;
144                     if (parens_from)
145                         from = parens_from;
146                     while (isspace((unsigned char)*from) || (*from == ','))
147                         --from;
148                     from++;
149                     hostlen = strlen(host);
150                     for (cp = from + strlen(from); cp >= from; --cp)
151                         cp[hostlen+1] = *cp;
152                     *from++ = '@';
153                     memcpy(from, host, hostlen);
154                     from = p + hostlen + 1;
155                     has_host_part = TRUE;
156                 } 
157                 else if (from[1] == '('
158                          && has_bare_name_part
159                          && !has_host_part
160                          && last_nws != ';' && last_nws != ')')
161                 {
162                     parens_from = from;
163                 } 
164                 else if (!isspace((unsigned char)*from))
165                     has_bare_name_part = TRUE;
166                 break;
167
168             case 2:     /* we're in a string */
169                 if (*from == '"')
170                 {
171                     char        *bp;
172                     int         bscount;
173
174                     bscount = 0;
175                     for (bp = from - 1; *bp == '\\'; bp--)
176                         bscount++;
177                     if (!(bscount % 2))
178                         state = 1;
179                 }
180                 break;
181
182             case 3:     /* we're in a <>-enclosed address */
183                 if (*from == '@' || *from == '!')
184                     has_host_part = TRUE;
185                 else if (*from == '>' && (from > buf && from[-1] != '<'))
186                 {
187                     state = 1;
188                     if (!has_host_part)
189                     {
190                         int hostlen;
191
192                         hostlen = strlen(host);
193                         for (cp = from + strlen(from); cp >= from; --cp)
194                             cp[hostlen+1] = *cp;
195                         *from++ = '@';
196                         memcpy(from, host, hostlen);
197                         from += hostlen;
198                         has_host_part = TRUE;
199                     }
200                 }
201                 break;
202             }
203
204         /*
205          * If we passed a comma, reset everything.
206          */
207         if ((from > buf && from[-1] == ',') && !parendepth) {
208           has_host_part = has_bare_name_part = FALSE;
209           parens_from = NULL;
210         }
211     }
212
213 #ifndef MAIN
214     if (outlevel >= O_DEBUG)
215         report_complete(stdout, GT_("...rewritten version is %.*s.\n"),
216                         (int)BEFORE_EOL(buf), buf);
217 #endif /* MAIN */
218     *length = strlen(buf);
219     return(buf);
220 }
221
222 char *nxtaddr(const char *hdr /* header to be parsed, NUL to continue previous hdr */)
223 /* parse addresses in succession out of a specified RFC822 header */
224 {
225     static char address[BUFSIZ];
226     static size_t tp;
227     static const char *hp;
228     static int  state, oldstate;
229 #ifdef MAIN
230     static const char *orighdr;
231 #endif /* MAIN */
232     int parendepth = 0;
233
234 #define START_HDR       0       /* before header colon */
235 #define SKIP_JUNK       1       /* skip whitespace, \n, and junk */
236 #define BARE_ADDRESS    2       /* collecting address without delimiters */
237 #define INSIDE_DQUOTE   3       /* inside double quotes */
238 #define INSIDE_PARENS   4       /* inside parentheses */
239 #define INSIDE_BRACKETS 5       /* inside bracketed address */
240 #define ENDIT_ALL       6       /* after last address */
241
242 #define NEXTTP()        ((tp < sizeof(address)-1) ? tp++ : tp)
243
244     if (hdr)
245     {
246         hp = hdr;
247         state = START_HDR;
248 #ifdef MAIN
249         orighdr = hdr;
250 #endif /* MAIN */
251         tp = 0;
252     }
253
254     if (!hp) return NULL;
255
256     for (; *hp; hp++)
257     {
258 #ifdef MAIN
259         if (verbose)
260         {
261             printf("state %d: %s", state, orighdr);
262             printf("%*s^\n", (int)(hp - orighdr + 10), " ");
263         }
264 #endif /* MAIN */
265
266         if (state == ENDIT_ALL)         /* after last address */
267             return(NULL);
268         else if (HEADER_END(hp))
269         {
270             state = ENDIT_ALL;
271             if (tp)
272             {
273                 while (tp > 0 && isspace((unsigned char)address[tp - 1]))
274                     tp--;
275                 address[tp] = '\0';
276                 tp = 0;
277                 return (address);
278             }
279             return(NULL);
280         }
281         else if (*hp == '\\')           /* handle RFC822 escaping */
282         {
283             if (state != INSIDE_PARENS)
284             {
285                 address[NEXTTP()] = *hp++;      /* take the escape */
286                 address[NEXTTP()] = *hp;        /* take following unsigned char */
287             }
288         }
289         else switch (state)
290         {
291         case START_HDR:   /* before header colon */
292             if (*hp == ':')
293                 state = SKIP_JUNK;
294             break;
295
296         case SKIP_JUNK:         /* looking for address start */
297             if (*hp == '"')     /* quoted string */
298             {
299                 oldstate = SKIP_JUNK;
300                 state = INSIDE_DQUOTE;
301                 address[NEXTTP()] = *hp;
302             }
303             else if (*hp == '(')        /* address comment -- ignore */
304             {
305                 parendepth = 1;
306                 oldstate = SKIP_JUNK;
307                 state = INSIDE_PARENS;    
308             }
309             else if (*hp == '<')        /* begin <address> */
310             {
311                 state = INSIDE_BRACKETS;
312                 tp = 0;
313             }
314             else if (*hp != ',' && !isspace((unsigned char)*hp))
315             {
316                 --hp;
317                 state = BARE_ADDRESS;
318             }
319             break;
320
321         case BARE_ADDRESS:      /* collecting address without delimiters */
322             if (*hp == ',')     /* end of address */
323             {
324                 if (tp)
325                 {
326                     address[NEXTTP()] = '\0';
327                     state = SKIP_JUNK;
328                     tp = 0;
329                     return(address);
330                 }
331             }
332             else if (*hp == '(')        /* beginning of comment */
333             {
334                 parendepth = 1;
335                 oldstate = BARE_ADDRESS;
336                 state = INSIDE_PARENS;    
337             }
338             else if (*hp == '<')        /* beginning of real address */
339             {
340                 state = INSIDE_BRACKETS;
341                 tp = 0;
342             }
343             else if (*hp == '"')        /* quoted word, copy verbatim */
344             {
345                 oldstate = state;
346                 state = INSIDE_DQUOTE;
347                 address[NEXTTP()] = *hp;
348             }
349             else if (!isspace((unsigned char)*hp))      /* just take it, ignoring whitespace */
350                 address[NEXTTP()] = *hp;
351             break;
352
353         case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
354             address[NEXTTP()] = *hp;
355             if (*hp == '"')
356                 state = oldstate;
357             break;
358
359         case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
360             if (*hp == '(')
361                 ++parendepth;
362             else if (*hp == ')')
363                 --parendepth;
364             if (parendepth == 0)
365                 state = oldstate;
366             break;
367
368         case INSIDE_BRACKETS:   /* possible <>-enclosed address */
369             if (*hp == '>')     /* end of address */
370             {
371                 address[NEXTTP()] = '\0';
372                 state = SKIP_JUNK;
373                 ++hp;
374                 tp = 0;
375                 return(address);
376             }
377             else if (*hp == '<')        /* nested <> */
378                 tp = 0;
379             else if (*hp == '"')        /* quoted address */
380             {
381                 address[NEXTTP()] = *hp;
382                 oldstate = INSIDE_BRACKETS;
383                 state = INSIDE_DQUOTE;
384             }
385             else                        /* just copy address */
386                 address[NEXTTP()] = *hp;
387             break;
388         }
389     }
390
391     return(NULL);
392 }
393
394 #ifdef MAIN
395 static void parsebuf(char *longbuf, int reply)
396 {
397     char        *cp;
398     size_t      dummy;
399
400     if (reply)
401     {
402         reply_hack(longbuf, "HOSTNAME.NET", &dummy);
403         printf("Rewritten buffer: %s", (char *)longbuf);
404     }
405     else
406         if ((cp = nxtaddr(longbuf)) != (char *)NULL)
407             do {
408                 printf("\t-> \"%s\"\n", (char *)cp);
409             } while
410                 ((cp = nxtaddr((char *)NULL)) != (char *)NULL);
411 }
412
413
414
415 int main(int argc, char *argv[])
416 {
417     char        buf[BUFSIZ], longbuf[BUFSIZ];
418     int         ch, reply;
419     
420     verbose = reply = FALSE;
421     while ((ch = getopt(argc, argv, "rv")) != EOF)
422         switch(ch)
423         {
424         case 'r':
425             reply = TRUE;
426             break;
427
428         case 'v':
429             verbose = TRUE;
430             break;
431         }
432
433     while (fgets(buf, sizeof(buf)-1, stdin))
434     {
435         if (buf[0] == ' ' || buf[0] == '\t')
436             strlcat(longbuf, buf, sizeof(longbuf));
437         else if (!strncasecmp("From: ", buf, 6)
438                     || !strncasecmp("To: ", buf, 4)
439                     || !strncasecmp("Reply-", buf, 6)
440                     || !strncasecmp("Cc: ", buf, 4)
441                     || !strncasecmp("Bcc: ", buf, 5))
442             strlcpy(longbuf, buf, sizeof(longbuf));
443         else if (longbuf[0])
444         {
445             if (verbose)
446                 fputs(longbuf, stdout);
447             parsebuf(longbuf, reply);
448             longbuf[0] = '\0';
449         }
450     }
451     if (longbuf[0])
452     {
453         if (verbose)
454             fputs(longbuf, stdout);
455         parsebuf(longbuf, reply);
456     }
457     exit(0);
458 }
459 #endif /* MAIN */
460
461 /* rfc822.c end */