]> Pileus Git - ~andy/fetchmail/blob - rfc822.c
8e0e4a3e62a92b60963a0570c745841daedd0b6e
[~andy/fetchmail] / rfc822.c
1 /*
2  * rfc822.c -- code for slicing and dicing RFC822 mail headers
3  *
4  * Copyright 1997 by Eric S. Raymond
5  * For license terms, see the file COPYING in this directory.
6  */
7
8 #include  <stdio.h>
9 #include  <ctype.h>
10 #include  <string.h>
11 #if defined(STDC_HEADERS)
12 #include  <stdlib.h>
13 #endif
14
15 #include "config.h"
16 #include "fetchmail.h"
17
18 #define HEADER_END(p)   ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
19
20 #ifdef TESTMAIN
21 static int verbose;
22 char *program_name = "rfc822";
23 #endif /* TESTMAIN */
24
25 char *reply_hack(buf, host)
26 /* hack message headers so replies will work properly */
27 char *buf;              /* header to be hacked */
28 const char *host;       /* server hostname */
29 {
30     char *from, *cp, last_nws = '\0', *parens_from = NULL;
31     int parendepth, state, has_bare_name_part, has_host_part;
32 #ifndef TESTMAIN
33     int addresscount = 1;
34 #endif /* TESTMAIN */
35
36     if (strncasecmp("From: ", buf, 6)
37         && strncasecmp("To: ", buf, 4)
38         && strncasecmp("Reply-To: ", buf, 10)
39         && strncasecmp("Return-Path: ", buf, 13)
40         && strncasecmp("Cc: ", buf, 4)
41         && strncasecmp("Bcc: ", buf, 5)
42         && strncasecmp("Resent-From: ", buf, 13)
43         && strncasecmp("Resent-To: ", buf, 11)
44         && strncasecmp("Resent-Cc: ", buf, 11)
45         && strncasecmp("Resent-Bcc: ", buf, 12)
46         && strncasecmp("Apparently-From:", buf, 16)
47         && strncasecmp("Apparently-To:", buf, 14)
48         && strncasecmp("Sender:", buf, 7)
49         && strncasecmp("Resent-Sender:", buf, 14)
50        ) {
51         return(buf);
52     }
53
54 #ifndef TESTMAIN
55     if (outlevel >= O_DEBUG)
56         report_build(stdout, "About to rewrite %s", buf);
57
58     /* make room to hack the address; buf must be malloced */
59     for (cp = buf; *cp; cp++)
60         if (*cp == ',' || isspace(*cp))
61             addresscount++;
62     buf = (char *)xrealloc(buf, strlen(buf) + addresscount * strlen(host) + 1);
63 #endif /* TESTMAIN */
64
65     /*
66      * This is going to foo up on some ill-formed addresses.
67      * Note that we don't rewrite the fake address <> in order to
68      * avoid screwing up bounce suppression with a null Return-Path.
69      */
70
71     parendepth = state = 0;
72     has_host_part = has_bare_name_part = FALSE;
73     for (from = buf; *from; from++)
74     {
75 #ifdef TESTMAIN
76         if (verbose)
77         {
78             printf("state %d: %s", state, buf);
79             printf("%*s^\n", from - buf + 10, " ");
80         }
81 #endif /* TESTMAIN */
82         if (state != 2)
83             if (*from == '(')
84                 ++parendepth;
85             else if (*from == ')')
86                 --parendepth;
87
88         if (!parendepth && !has_host_part)
89             switch (state)
90             {
91             case 0:     /* before header colon */
92                 if (*from == ':')
93                     state = 1;
94                 break;
95
96             case 1:     /* we've seen the colon, we're looking for addresses */
97                 if (!isspace(*from))
98                     last_nws = *from;
99                 if (*from == '<')
100                     state = 3;
101                 else if (*from == '@')
102                     has_host_part = TRUE;
103                 else if (*from == '"')
104                     state = 2;
105                 /*
106                  * Not expanding on last non-WS == ';' deals with groupnames,
107                  * an obscure misfeature described in sections
108                  * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
109                  */
110                 else if ((*from == ',' || HEADER_END(from))
111                          && has_bare_name_part
112                          && !has_host_part
113                          && last_nws != ';')
114                 {
115                     int hostlen;
116                     char *p;
117
118                     p = from;
119                     if (parens_from)
120                         from = parens_from;
121                     while (isspace(*from) || (*from == ','))
122                         --from;
123                     from++;
124                     hostlen = strlen(host);
125                     for (cp = from + strlen(from); cp >= from; --cp)
126                         cp[hostlen+1] = *cp;
127                     *from++ = '@';
128                     memcpy(from, host, hostlen);
129                     from = p + hostlen + 1;
130                     has_host_part = TRUE;
131                 } 
132                 else if (from[1] == '('
133                          && has_bare_name_part
134                          && !has_host_part
135                          && last_nws != ';' && last_nws != ')')
136                 {
137                     parens_from = from;
138                 } 
139                 else if (!isspace(*from))
140                     has_bare_name_part = TRUE;
141                 break;
142
143             case 2:     /* we're in a string */
144                 if (*from == '"')
145                     state = 1;
146                 break;
147
148             case 3:     /* we're in a <>-enclosed address */
149                 if (*from == '@')
150                     has_host_part = TRUE;
151                 else if (*from == '>' && from[-1] != '<')
152                 {
153                     state = 1;
154                     if (!has_host_part)
155                     {
156                         int hostlen;
157
158                         hostlen = strlen(host);
159                         for (cp = from + strlen(from); cp >= from; --cp)
160                             cp[hostlen+1] = *cp;
161                         *from++ = '@';
162                         memcpy(from, host, hostlen);
163                         from += hostlen;
164                         has_host_part = TRUE;
165                     }
166                 }
167                 break;
168             }
169
170         /*
171          * If we passed a comma, reset everything.
172          */
173         if (from[-1] == ',' && !parendepth) {
174           has_host_part = has_bare_name_part = FALSE;
175           parens_from = NULL;
176         }
177     }
178
179 #ifndef TESTMAIN
180     if (outlevel >= O_DEBUG)
181         report_complete(stdout, 0, "Rewritten version is %s\n", buf);
182 #endif /* TESTMAIN */
183     return(buf);
184 }
185
186 char *nxtaddr(hdr)
187 /* parse addresses in succession out of a specified RFC822 header */
188 const char *hdr;        /* header to be parsed, NUL to continue previous hdr */
189 {
190     static char *tp, address[POPBUFSIZE+1];
191     static const char *hp;
192     static int  state, oldstate;
193 #ifdef TESTMAIN
194     static const char *orighdr;
195 #endif /* TESTMAIN */
196     int parendepth = 0;
197
198 #define START_HDR       0       /* before header colon */
199 #define SKIP_JUNK       1       /* skip whitespace, \n, and junk */
200 #define BARE_ADDRESS    2       /* collecting address without delimiters */
201 #define INSIDE_DQUOTE   3       /* inside double quotes */
202 #define INSIDE_PARENS   4       /* inside parentheses */
203 #define INSIDE_BRACKETS 5       /* inside bracketed address */
204 #define ENDIT_ALL       6       /* after last address */
205
206     if (hdr)
207     {
208         hp = hdr;
209         state = START_HDR;
210 #ifdef TESTMAIN
211         orighdr = hdr;
212 #endif /* TESTMAIN */
213         tp = address;
214     }
215
216     for (; *hp; hp++)
217     {
218 #ifdef TESTMAIN
219         if (verbose)
220         {
221             printf("state %d: %s", state, orighdr);
222             printf("%*s^\n", hp - orighdr + 10, " ");
223         }
224 #endif /* TESTMAIN */
225
226         if (state == ENDIT_ALL)         /* after last address */
227             return(NULL);
228         else if (HEADER_END(hp))
229         {
230             state = ENDIT_ALL;
231             if (tp > address)
232             {
233                 while (isspace(*--tp))
234                     continue;
235                 *++tp = '\0';
236             }
237             return(tp > address ? (tp = address) : (char *)NULL);
238         }
239         else if (*hp == '\\')           /* handle RFC822 escaping */
240         {
241             if (state != INSIDE_PARENS)
242             {
243                 *tp++ = *hp++;                  /* take the escape */
244                 *tp++ = *hp;                    /* take following char */
245             }
246         }
247         else switch (state)
248         {
249         case START_HDR:   /* before header colon */
250             if (*hp == ':')
251                 state = SKIP_JUNK;
252             break;
253
254         case SKIP_JUNK:         /* looking for address start */
255             if (*hp == '"')     /* quoted string */
256             {
257                 oldstate = SKIP_JUNK;
258                 state = INSIDE_DQUOTE;
259                 *tp++ = *hp;
260             }
261             else if (*hp == '(')        /* address comment -- ignore */
262             {
263                 parendepth = 1;
264                 oldstate = SKIP_JUNK;
265                 state = INSIDE_PARENS;    
266             }
267             else if (*hp == '<')        /* begin <address> */
268             {
269                 state = INSIDE_BRACKETS;
270                 tp = address;
271             }
272             else if (*hp != ',' && !isspace(*hp))
273             {
274                 --hp;
275                 state = BARE_ADDRESS;
276             }
277             break;
278
279         case BARE_ADDRESS:      /* collecting address without delimiters */
280             if (*hp == ',')     /* end of address */
281             {
282                 if (tp > address)
283                 {
284                     *tp++ = '\0';
285                     state = SKIP_JUNK;
286                     return(tp = address);
287                 }
288             }
289             else if (*hp == '(')        /* beginning of comment */
290             {
291                 parendepth = 1;
292                 oldstate = BARE_ADDRESS;
293                 state = INSIDE_PARENS;    
294             }
295             else if (*hp == '<')        /* beginning of real address */
296             {
297                 state = INSIDE_BRACKETS;
298                 tp = address;
299             }
300             else if (!isspace(*hp))     /* just take it, ignoring whitespace */
301                 *tp++ = *hp;
302             break;
303
304         case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
305             if (*hp != '"')
306                 *tp++ = *hp;
307             else
308             {
309                 *tp++ = *hp;
310                 state = oldstate;
311             }
312             break;
313
314         case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
315             if (*hp == '(')
316                 ++parendepth;
317             else if (*hp == ')')
318                 --parendepth;
319             if (parendepth == 0)
320                 state = oldstate;
321             break;
322
323         case INSIDE_BRACKETS:   /* possible <>-enclosed address */
324             if (*hp == '>')     /* end of address */
325             {
326                 *tp++ = '\0';
327                 state = SKIP_JUNK;
328                 ++hp;
329                 return(tp = address);
330             }
331             else if (*hp == '<')        /* nested <> */
332                 tp = address;
333             else if (*hp == '"')        /* quoted address */
334             {
335                 *tp++ = *hp;
336                 oldstate = INSIDE_BRACKETS;
337                 state = INSIDE_DQUOTE;
338             }
339             else                        /* just copy address */
340                 *tp++ = *hp;
341             break;
342         }
343     }
344
345     return(NULL);
346 }
347
348 #ifdef TESTMAIN
349 static void parsebuf(char *longbuf, int reply)
350 {
351     char        *cp;
352
353     if (reply)
354     {
355         reply_hack(longbuf, "HOSTNAME.NET");
356         printf("Rewritten buffer: %s", longbuf);
357     }
358     else
359         if ((cp = nxtaddr(longbuf)) != (char *)NULL)
360             do {
361                 printf("\t-> \"%s\"\n", cp);
362             } while
363                 ((cp = nxtaddr((char *)NULL)) != (char *)NULL);
364 }
365
366
367
368 main(int argc, char *argv[])
369 {
370     char        buf[MSGBUFSIZE], longbuf[BUFSIZ];
371     int         ch, reply;
372     
373     verbose = reply = FALSE;
374     while ((ch = getopt(argc, argv, "rv")) != EOF)
375         switch(ch)
376         {
377         case 'r':
378             reply = TRUE;
379             break;
380
381         case 'v':
382             verbose = TRUE;
383             break;
384         }
385
386     while (fgets(buf, sizeof(buf)-1, stdin))
387     {
388         if (buf[0] == ' ' || buf[0] == '\t')
389             strcat(longbuf, buf);
390         else if (!strncasecmp("From: ", buf, 6)
391                     || !strncasecmp("To: ", buf, 4)
392                     || !strncasecmp("Reply-", buf, 6)
393                     || !strncasecmp("Cc: ", buf, 4)
394                     || !strncasecmp("Bcc: ", buf, 5))
395             strcpy(longbuf, buf);       
396         else if (longbuf[0])
397         {
398             if (verbose)
399                 fputs(longbuf, stdout);
400             parsebuf(longbuf, reply);
401             longbuf[0] = '\0';
402         }
403     }
404     if (longbuf[0])
405     {
406         if (verbose)
407             fputs(longbuf, stdout);
408         parsebuf(longbuf, reply);
409     }
410 }
411 #endif /* TESTMAIN */
412
413 /* rfc822.c end */