]> Pileus Git - ~andy/fetchmail/blob - rfc822.c
de6008bf3427bf7de4dee234b3e70891d979730b
[~andy/fetchmail] / rfc822.c
1 /*
2  * rfc822.c -- code for slicing and dicing RFC822 mail headers
3  *
4  * Copyright 1997 by Eric S. Raymond
5  * For license terms, see the file COPYING in this directory.
6  */
7
8 #include  <stdio.h>
9 #include  <ctype.h>
10 #include  <string.h>
11 #if defined(STDC_HEADERS)
12 #include  <stdlib.h>
13 #endif
14
15 #include "config.h"
16 #include "fetchmail.h"
17
18 #define HEADER_END(p)   ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
19
20 #ifdef TESTMAIN
21 static int verbose;
22 char *program_name = "rfc822";
23 #endif /* TESTMAIN */
24
25 unsigned char *reply_hack(buf, host)
26 /* hack message headers so replies will work properly */
27 unsigned char *buf;             /* header to be hacked */
28 const unsigned char *host;      /* server hostname */
29 {
30     unsigned char *from, *cp, last_nws = '\0', *parens_from = NULL;
31     int parendepth, state, has_bare_name_part, has_host_part;
32 #ifndef TESTMAIN
33     int addresscount = 1;
34 #endif /* TESTMAIN */
35
36     if (strncasecmp("From:", buf, 5)
37         && strncasecmp("To:", buf, 3)
38         && strncasecmp("Reply-To:", buf, 9)
39         && strncasecmp("Return-Path:", buf, 12)
40         && strncasecmp("Cc:", buf, 3)
41         && strncasecmp("Bcc:", buf, 4)
42         && strncasecmp("Resent-From:", buf, 12)
43         && strncasecmp("Resent-To:", buf, 10)
44         && strncasecmp("Resent-Cc:", buf, 10)
45         && strncasecmp("Resent-Bcc:", buf, 11)
46         && strncasecmp("Apparently-From:", buf, 16)
47         && strncasecmp("Apparently-To:", buf, 14)
48         && strncasecmp("Sender:", buf, 7)
49         && strncasecmp("Resent-Sender:", buf, 14)
50        ) {
51         return(buf);
52     }
53
54 #ifndef TESTMAIN
55     if (outlevel >= O_DEBUG)
56         report_build(stdout, "About to rewrite %s", buf);
57
58     /* make room to hack the address; buf must be malloced */
59     for (cp = buf; *cp; cp++)
60         if (*cp == ',' || isspace(*cp))
61             addresscount++;
62     buf = (unsigned char *)xrealloc(buf, strlen(buf) + addresscount * strlen(host) + 1);
63 #endif /* TESTMAIN */
64
65     /*
66      * This is going to foo up on some ill-formed addresses.
67      * Note that we don't rewrite the fake address <> in order to
68      * avoid screwing up bounce suppression with a null Return-Path.
69      */
70
71     parendepth = state = 0;
72     has_host_part = has_bare_name_part = FALSE;
73     for (from = buf; *from; from++)
74     {
75 #ifdef TESTMAIN
76         if (verbose)
77         {
78             printf("state %d: %s", state, buf);
79             printf("%*s^\n", from - buf + 10, " ");
80         }
81 #endif /* TESTMAIN */
82         if (state != 2)
83         {
84             if (*from == '(')
85                 ++parendepth;
86             else if (*from == ')')
87                 --parendepth;
88         }
89
90         if (!parendepth && !has_host_part)
91             switch (state)
92             {
93             case 0:     /* before header colon */
94                 if (*from == ':')
95                     state = 1;
96                 break;
97
98             case 1:     /* we've seen the colon, we're looking for addresses */
99                 if (!isspace(*from))
100                     last_nws = *from;
101                 if (*from == '<')
102                     state = 3;
103                 else if (*from == '@')
104                     has_host_part = TRUE;
105                 else if (*from == '"')
106                     state = 2;
107                 /*
108                  * Not expanding on last non-WS == ';' deals with groupnames,
109                  * an obscure misfeature described in sections
110                  * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
111                  */
112                 else if ((*from == ',' || HEADER_END(from))
113                          && has_bare_name_part
114                          && !has_host_part
115                          && last_nws != ';')
116                 {
117                     int hostlen;
118                     unsigned char *p;
119
120                     p = from;
121                     if (parens_from)
122                         from = parens_from;
123                     while (isspace(*from) || (*from == ','))
124                         --from;
125                     from++;
126                     hostlen = strlen(host);
127                     for (cp = from + strlen(from); cp >= from; --cp)
128                         cp[hostlen+1] = *cp;
129                     *from++ = '@';
130                     memcpy(from, host, hostlen);
131                     from = p + hostlen + 1;
132                     has_host_part = TRUE;
133                 } 
134                 else if (from[1] == '('
135                          && has_bare_name_part
136                          && !has_host_part
137                          && last_nws != ';' && last_nws != ')')
138                 {
139                     parens_from = from;
140                 } 
141                 else if (!isspace(*from))
142                     has_bare_name_part = TRUE;
143                 break;
144
145             case 2:     /* we're in a string */
146                 if (*from == '"')
147                     state = 1;
148                 break;
149
150             case 3:     /* we're in a <>-enclosed address */
151                 if (*from == '@')
152                     has_host_part = TRUE;
153                 else if (*from == '>' && from[-1] != '<')
154                 {
155                     state = 1;
156                     if (!has_host_part)
157                     {
158                         int hostlen;
159
160                         hostlen = strlen(host);
161                         for (cp = from + strlen(from); cp >= from; --cp)
162                             cp[hostlen+1] = *cp;
163                         *from++ = '@';
164                         memcpy(from, host, hostlen);
165                         from += hostlen;
166                         has_host_part = TRUE;
167                     }
168                 }
169                 break;
170             }
171
172         /*
173          * If we passed a comma, reset everything.
174          */
175         if (from[-1] == ',' && !parendepth) {
176           has_host_part = has_bare_name_part = FALSE;
177           parens_from = NULL;
178         }
179     }
180
181 #ifndef TESTMAIN
182     if (outlevel >= O_DEBUG)
183         report_complete(stdout, "Rewritten version is %s\n", buf);
184 #endif /* TESTMAIN */
185     return(buf);
186 }
187
188 unsigned char *nxtaddr(hdr)
189 /* parse addresses in succession out of a specified RFC822 header */
190 const unsigned char *hdr;       /* header to be parsed, NUL to continue previous hdr */
191 {
192     static unsigned char *tp, address[POPBUFSIZE+1];
193     static const unsigned char *hp;
194     static int  state, oldstate;
195 #ifdef TESTMAIN
196     static const unsigned char *orighdr;
197 #endif /* TESTMAIN */
198     int parendepth = 0;
199
200 #define START_HDR       0       /* before header colon */
201 #define SKIP_JUNK       1       /* skip whitespace, \n, and junk */
202 #define BARE_ADDRESS    2       /* collecting address without delimiters */
203 #define INSIDE_DQUOTE   3       /* inside double quotes */
204 #define INSIDE_PARENS   4       /* inside parentheses */
205 #define INSIDE_BRACKETS 5       /* inside bracketed address */
206 #define ENDIT_ALL       6       /* after last address */
207
208     if (hdr)
209     {
210         hp = hdr;
211         state = START_HDR;
212 #ifdef TESTMAIN
213         orighdr = hdr;
214 #endif /* TESTMAIN */
215         tp = address;
216     }
217
218     for (; *hp; hp++)
219     {
220 #ifdef TESTMAIN
221         if (verbose)
222         {
223             printf("state %d: %s", state, orighdr);
224             printf("%*s^\n", hp - orighdr + 10, " ");
225         }
226 #endif /* TESTMAIN */
227
228         if (state == ENDIT_ALL)         /* after last address */
229             return(NULL);
230         else if (HEADER_END(hp))
231         {
232             state = ENDIT_ALL;
233             if (tp > address)
234             {
235                 while (isspace(*--tp))
236                     continue;
237                 *++tp = '\0';
238             }
239             return(tp > address ? (tp = address) : (unsigned char *)NULL);
240         }
241         else if (*hp == '\\')           /* handle RFC822 escaping */
242         {
243             if (state != INSIDE_PARENS)
244             {
245                 *tp++ = *hp++;                  /* take the escape */
246                 *tp++ = *hp;                    /* take following unsigned char */
247             }
248         }
249         else switch (state)
250         {
251         case START_HDR:   /* before header colon */
252             if (*hp == ':')
253                 state = SKIP_JUNK;
254             break;
255
256         case SKIP_JUNK:         /* looking for address start */
257             if (*hp == '"')     /* quoted string */
258             {
259                 oldstate = SKIP_JUNK;
260                 state = INSIDE_DQUOTE;
261                 *tp++ = *hp;
262             }
263             else if (*hp == '(')        /* address comment -- ignore */
264             {
265                 parendepth = 1;
266                 oldstate = SKIP_JUNK;
267                 state = INSIDE_PARENS;    
268             }
269             else if (*hp == '<')        /* begin <address> */
270             {
271                 state = INSIDE_BRACKETS;
272                 tp = address;
273             }
274             else if (*hp != ',' && !isspace(*hp))
275             {
276                 --hp;
277                 state = BARE_ADDRESS;
278             }
279             break;
280
281         case BARE_ADDRESS:      /* collecting address without delimiters */
282             if (*hp == ',')     /* end of address */
283             {
284                 if (tp > address)
285                 {
286                     *tp++ = '\0';
287                     state = SKIP_JUNK;
288                     return(tp = address);
289                 }
290             }
291             else if (*hp == '(')        /* beginning of comment */
292             {
293                 parendepth = 1;
294                 oldstate = BARE_ADDRESS;
295                 state = INSIDE_PARENS;    
296             }
297             else if (*hp == '<')        /* beginning of real address */
298             {
299                 state = INSIDE_BRACKETS;
300                 tp = address;
301             }
302             else if (!isspace(*hp))     /* just take it, ignoring whitespace */
303                 *tp++ = *hp;
304             break;
305
306         case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
307             if (*hp != '"')
308                 *tp++ = *hp;
309             else
310             {
311                 *tp++ = *hp;
312                 state = oldstate;
313             }
314             break;
315
316         case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
317             if (*hp == '(')
318                 ++parendepth;
319             else if (*hp == ')')
320                 --parendepth;
321             if (parendepth == 0)
322                 state = oldstate;
323             break;
324
325         case INSIDE_BRACKETS:   /* possible <>-enclosed address */
326             if (*hp == '>')     /* end of address */
327             {
328                 *tp++ = '\0';
329                 state = SKIP_JUNK;
330                 ++hp;
331                 return(tp = address);
332             }
333             else if (*hp == '<')        /* nested <> */
334                 tp = address;
335             else if (*hp == '"')        /* quoted address */
336             {
337                 *tp++ = *hp;
338                 oldstate = INSIDE_BRACKETS;
339                 state = INSIDE_DQUOTE;
340             }
341             else                        /* just copy address */
342                 *tp++ = *hp;
343             break;
344         }
345     }
346
347     return(NULL);
348 }
349
350 #ifdef TESTMAIN
351 static void parsebuf(unsigned char *longbuf, int reply)
352 {
353     unsigned char       *cp;
354
355     if (reply)
356     {
357         reply_hack(longbuf, "HOSTNAME.NET");
358         printf("Rewritten buffer: %s", longbuf);
359     }
360     else
361         if ((cp = nxtaddr(longbuf)) != (unsigned char *)NULL)
362             do {
363                 printf("\t-> \"%s\"\n", cp);
364             } while
365                 ((cp = nxtaddr((unsigned char *)NULL)) != (unsigned char *)NULL);
366 }
367
368
369
370 main(int argc, char *argv[])
371 {
372     unsigned char       buf[MSGBUFSIZE], longbuf[BUFSIZ];
373     int                 ch, reply;
374     
375     verbose = reply = FALSE;
376     while ((ch = getopt(argc, argv, "rv")) != EOF)
377         switch(ch)
378         {
379         case 'r':
380             reply = TRUE;
381             break;
382
383         case 'v':
384             verbose = TRUE;
385             break;
386         }
387
388     while (fgets(buf, sizeof(buf)-1, stdin))
389     {
390         if (buf[0] == ' ' || buf[0] == '\t')
391             strcat(longbuf, buf);
392         else if (!strncasecmp("From: ", buf, 6)
393                     || !strncasecmp("To: ", buf, 4)
394                     || !strncasecmp("Reply-", buf, 6)
395                     || !strncasecmp("Cc: ", buf, 4)
396                     || !strncasecmp("Bcc: ", buf, 5))
397             strcpy(longbuf, buf);       
398         else if (longbuf[0])
399         {
400             if (verbose)
401                 fputs(longbuf, stdout);
402             parsebuf(longbuf, reply);
403             longbuf[0] = '\0';
404         }
405     }
406     if (longbuf[0])
407     {
408         if (verbose)
409             fputs(longbuf, stdout);
410         parsebuf(longbuf, reply);
411     }
412 }
413 #endif /* TESTMAIN */
414
415 /* rfc822.c end */