]> Pileus Git - ~andy/fetchmail/blob - rfc822.c
Ready to ship.
[~andy/fetchmail] / rfc822.c
1 /*
2  * rfc822.c -- code for slicing and dicing RFC822 mail headers
3  *
4  * Copyright 1997 by Eric S. Raymond
5  * For license terms, see the file COPYING in this directory.
6  */
7
8 #include  <stdio.h>
9 #include  <ctype.h>
10 #include  <string.h>
11 #if defined(STDC_HEADERS)
12 #include  <stdlib.h>
13 #endif
14
15 #include "config.h"
16 #include "fetchmail.h"
17 #include "i18n.h"
18
19 #define HEADER_END(p)   ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t' && (p)[1] != '\0'))
20
21 #ifdef TESTMAIN
22 static int verbose;
23 char *program_name = "rfc822";
24 #endif /* TESTMAIN */
25
26 unsigned char *reply_hack(buf, host)
27 /* hack message headers so replies will work properly */
28 unsigned char *buf;             /* header to be hacked */
29 const unsigned char *host;      /* server hostname */
30 {
31     unsigned char *from, *cp, last_nws = '\0', *parens_from = NULL;
32     int parendepth, state, has_bare_name_part, has_host_part;
33 #ifndef TESTMAIN
34     int addresscount = 1;
35 #endif /* TESTMAIN */
36
37     if (strncasecmp("From:", buf, 5)
38         && strncasecmp("To:", buf, 3)
39         && strncasecmp("Reply-To:", buf, 9)
40         && strncasecmp("Return-Path:", buf, 12)
41         && strncasecmp("Cc:", buf, 3)
42         && strncasecmp("Bcc:", buf, 4)
43         && strncasecmp("Resent-From:", buf, 12)
44         && strncasecmp("Resent-To:", buf, 10)
45         && strncasecmp("Resent-Cc:", buf, 10)
46         && strncasecmp("Resent-Bcc:", buf, 11)
47         && strncasecmp("Apparently-From:", buf, 16)
48         && strncasecmp("Apparently-To:", buf, 14)
49         && strncasecmp("Sender:", buf, 7)
50         && strncasecmp("Resent-Sender:", buf, 14)
51        ) {
52         return(buf);
53     }
54
55 #ifndef TESTMAIN
56     if (outlevel >= O_DEBUG)
57         report_build(stdout, _("About to rewrite %s"), buf);
58
59     /* make room to hack the address; buf must be malloced */
60     for (cp = buf; *cp; cp++)
61         if (*cp == ',' || isspace(*cp))
62             addresscount++;
63     buf = (unsigned char *)xrealloc(buf, strlen(buf) + addresscount * strlen(host) + 1);
64 #endif /* TESTMAIN */
65
66     /*
67      * This is going to foo up on some ill-formed addresses.
68      * Note that we don't rewrite the fake address <> in order to
69      * avoid screwing up bounce suppression with a null Return-Path.
70      */
71
72     parendepth = state = 0;
73     has_host_part = has_bare_name_part = FALSE;
74     for (from = buf; *from; from++)
75     {
76 #ifdef TESTMAIN
77         if (verbose)
78         {
79             printf("state %d: %s", state, buf);
80             printf("%*s^\n", from - buf + 10, " ");
81         }
82 #endif /* TESTMAIN */
83         if (state != 2)
84         {
85             if (*from == '(')
86                 ++parendepth;
87             else if (*from == ')')
88                 --parendepth;
89         }
90
91         if (!parendepth && !has_host_part)
92             switch (state)
93             {
94             case 0:     /* before header colon */
95                 if (*from == ':')
96                     state = 1;
97                 break;
98
99             case 1:     /* we've seen the colon, we're looking for addresses */
100                 if (!isspace(*from))
101                     last_nws = *from;
102                 if (*from == '<')
103                     state = 3;
104                 else if (*from == '@')
105                     has_host_part = TRUE;
106                 else if (*from == '"')
107                     state = 2;
108                 /*
109                  * Not expanding on last non-WS == ';' deals with groupnames,
110                  * an obscure misfeature described in sections
111                  * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
112                  */
113                 else if ((*from == ',' || HEADER_END(from))
114                          && has_bare_name_part
115                          && !has_host_part
116                          && last_nws != ';')
117                 {
118                     int hostlen;
119                     unsigned char *p;
120
121                     p = from;
122                     if (parens_from)
123                         from = parens_from;
124                     while (isspace(*from) || (*from == ','))
125                         --from;
126                     from++;
127                     hostlen = strlen(host);
128                     for (cp = from + strlen(from); cp >= from; --cp)
129                         cp[hostlen+1] = *cp;
130                     *from++ = '@';
131                     memcpy(from, host, hostlen);
132                     from = p + hostlen + 1;
133                     has_host_part = TRUE;
134                 } 
135                 else if (from[1] == '('
136                          && has_bare_name_part
137                          && !has_host_part
138                          && last_nws != ';' && last_nws != ')')
139                 {
140                     parens_from = from;
141                 } 
142                 else if (!isspace(*from))
143                     has_bare_name_part = TRUE;
144                 break;
145
146             case 2:     /* we're in a string */
147                 if (*from == '"')
148                     state = 1;
149                 break;
150
151             case 3:     /* we're in a <>-enclosed address */
152                 if (*from == '@')
153                     has_host_part = TRUE;
154                 else if (*from == '>' && from[-1] != '<')
155                 {
156                     state = 1;
157                     if (!has_host_part)
158                     {
159                         int hostlen;
160
161                         hostlen = strlen(host);
162                         for (cp = from + strlen(from); cp >= from; --cp)
163                             cp[hostlen+1] = *cp;
164                         *from++ = '@';
165                         memcpy(from, host, hostlen);
166                         from += hostlen;
167                         has_host_part = TRUE;
168                     }
169                 }
170                 break;
171             }
172
173         /*
174          * If we passed a comma, reset everything.
175          */
176         if (from[-1] == ',' && !parendepth) {
177           has_host_part = has_bare_name_part = FALSE;
178           parens_from = NULL;
179         }
180     }
181
182 #ifndef TESTMAIN
183     if (outlevel >= O_DEBUG)
184         report_complete(stdout, _("Rewritten version is %s\n"), buf);
185 #endif /* TESTMAIN */
186     return(buf);
187 }
188
189 unsigned char *nxtaddr(hdr)
190 /* parse addresses in succession out of a specified RFC822 header */
191 const unsigned char *hdr;       /* header to be parsed, NUL to continue previous hdr */
192 {
193     static unsigned char address[POPBUFSIZE+1];
194     static int tp;
195     static const unsigned char *hp;
196     static int  state, oldstate;
197 #ifdef TESTMAIN
198     static const unsigned char *orighdr;
199 #endif /* TESTMAIN */
200     int parendepth = 0;
201
202 #define START_HDR       0       /* before header colon */
203 #define SKIP_JUNK       1       /* skip whitespace, \n, and junk */
204 #define BARE_ADDRESS    2       /* collecting address without delimiters */
205 #define INSIDE_DQUOTE   3       /* inside double quotes */
206 #define INSIDE_PARENS   4       /* inside parentheses */
207 #define INSIDE_BRACKETS 5       /* inside bracketed address */
208 #define ENDIT_ALL       6       /* after last address */
209
210 #define NEXTTP()        ((tp < sizeof(address)-1) ? tp++ : tp)
211
212     if (hdr)
213     {
214         hp = hdr;
215         state = START_HDR;
216 #ifdef TESTMAIN
217         orighdr = hdr;
218 #endif /* TESTMAIN */
219         tp = 0;
220     }
221
222     for (; *hp; hp++)
223     {
224 #ifdef TESTMAIN
225         if (verbose)
226         {
227             printf("state %d: %s", state, orighdr);
228             printf("%*s^\n", hp - orighdr + 10, " ");
229         }
230 #endif /* TESTMAIN */
231
232         if (state == ENDIT_ALL)         /* after last address */
233             return(NULL);
234         else if (HEADER_END(hp))
235         {
236             state = ENDIT_ALL;
237             if (tp)
238             {
239                 while (isspace(address[--tp]))
240                     continue;
241                 address[++tp] = '\0';
242                 tp = 0;
243                 return (address);
244             }
245             return((unsigned char *)NULL);
246         }
247         else if (*hp == '\\')           /* handle RFC822 escaping */
248         {
249             if (state != INSIDE_PARENS)
250             {
251                 address[NEXTTP()] = *hp++;      /* take the escape */
252                 address[NEXTTP()] = *hp;        /* take following unsigned char */
253             }
254         }
255         else switch (state)
256         {
257         case START_HDR:   /* before header colon */
258             if (*hp == ':')
259                 state = SKIP_JUNK;
260             break;
261
262         case SKIP_JUNK:         /* looking for address start */
263             if (*hp == '"')     /* quoted string */
264             {
265                 oldstate = SKIP_JUNK;
266                 state = INSIDE_DQUOTE;
267                 address[NEXTTP()] = *hp;
268             }
269             else if (*hp == '(')        /* address comment -- ignore */
270             {
271                 parendepth = 1;
272                 oldstate = SKIP_JUNK;
273                 state = INSIDE_PARENS;    
274             }
275             else if (*hp == '<')        /* begin <address> */
276             {
277                 state = INSIDE_BRACKETS;
278                 tp = 0;
279             }
280             else if (*hp != ',' && !isspace(*hp))
281             {
282                 --hp;
283                 state = BARE_ADDRESS;
284             }
285             break;
286
287         case BARE_ADDRESS:      /* collecting address without delimiters */
288             if (*hp == ',')     /* end of address */
289             {
290                 if (tp)
291                 {
292                     address[NEXTTP()] = '\0';
293                     state = SKIP_JUNK;
294                     tp = 0;
295                     return(address);
296                 }
297             }
298             else if (*hp == '(')        /* beginning of comment */
299             {
300                 parendepth = 1;
301                 oldstate = BARE_ADDRESS;
302                 state = INSIDE_PARENS;    
303             }
304             else if (*hp == '<')        /* beginning of real address */
305             {
306                 state = INSIDE_BRACKETS;
307                 tp = 0;
308             }
309             else if (!isspace(*hp))     /* just take it, ignoring whitespace */
310                 address[NEXTTP()] = *hp;
311             break;
312
313         case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
314             if (*hp != '"')
315                 address[NEXTTP()] = *hp;
316             else
317             {
318                 address[NEXTTP()] = *hp;
319                 state = oldstate;
320             }
321             break;
322
323         case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
324             if (*hp == '(')
325                 ++parendepth;
326             else if (*hp == ')')
327                 --parendepth;
328             if (parendepth == 0)
329                 state = oldstate;
330             break;
331
332         case INSIDE_BRACKETS:   /* possible <>-enclosed address */
333             if (*hp == '>')     /* end of address */
334             {
335                 address[NEXTTP()] = '\0';
336                 state = SKIP_JUNK;
337                 ++hp;
338                 tp = 0;
339                 return(address);
340             }
341             else if (*hp == '<')        /* nested <> */
342                 tp = 0;
343             else if (*hp == '"')        /* quoted address */
344             {
345                 address[NEXTTP()] = *hp;
346                 oldstate = INSIDE_BRACKETS;
347                 state = INSIDE_DQUOTE;
348             }
349             else                        /* just copy address */
350                 address[NEXTTP()] = *hp;
351             break;
352         }
353     }
354
355     return(NULL);
356 }
357
358 #ifdef TESTMAIN
359 static void parsebuf(unsigned char *longbuf, int reply)
360 {
361     unsigned char       *cp;
362
363     if (reply)
364     {
365         reply_hack(longbuf, "HOSTNAME.NET");
366         printf("Rewritten buffer: %s", longbuf);
367     }
368     else
369         if ((cp = nxtaddr(longbuf)) != (unsigned char *)NULL)
370             do {
371                 printf("\t-> \"%s\"\n", cp);
372             } while
373                 ((cp = nxtaddr((unsigned char *)NULL)) != (unsigned char *)NULL);
374 }
375
376
377
378 main(int argc, char *argv[])
379 {
380     unsigned char       buf[MSGBUFSIZE], longbuf[BUFSIZ];
381     int                 ch, reply;
382     
383     verbose = reply = FALSE;
384     while ((ch = getopt(argc, argv, "rv")) != EOF)
385         switch(ch)
386         {
387         case 'r':
388             reply = TRUE;
389             break;
390
391         case 'v':
392             verbose = TRUE;
393             break;
394         }
395
396     while (fgets(buf, sizeof(buf)-1, stdin))
397     {
398         if (buf[0] == ' ' || buf[0] == '\t')
399             strcat(longbuf, buf);
400         else if (!strncasecmp("From: ", buf, 6)
401                     || !strncasecmp("To: ", buf, 4)
402                     || !strncasecmp("Reply-", buf, 6)
403                     || !strncasecmp("Cc: ", buf, 4)
404                     || !strncasecmp("Bcc: ", buf, 5))
405             strcpy(longbuf, buf);       
406         else if (longbuf[0])
407         {
408             if (verbose)
409                 fputs(longbuf, stdout);
410             parsebuf(longbuf, reply);
411             longbuf[0] = '\0';
412         }
413     }
414     if (longbuf[0])
415     {
416         if (verbose)
417             fputs(longbuf, stdout);
418         parsebuf(longbuf, reply);
419     }
420 }
421 #endif /* TESTMAIN */
422
423 /* rfc822.c end */