]> Pileus Git - ~andy/fetchmail/blob - rfc822.c
08805d93a843a5f3765cb9301e693d5495ee82df
[~andy/fetchmail] / rfc822.c
1 /*
2  * rfc822.c -- code for slicing and dicing RFC822 mail headers
3  *
4  * Copyright 1997 by Eric S. Raymond
5  * For license terms, see the file COPYING in this directory.
6  */
7
8 #include  <stdio.h>
9 #include  <ctype.h>
10 #include  <string.h>
11 #if defined(STDC_HEADERS)
12 #include  <stdlib.h>
13 #endif
14
15 #include "fetchmail.h"
16
17 #define HEADER_END(p)   ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
18
19 #ifdef TESTMAIN
20 static int verbose;
21 char *program_name = "rfc822";
22 #endif /* TESTMAIN */
23
24 char *reply_hack(buf, host)
25 /* hack message headers so replies will work properly */
26 char *buf;              /* header to be hacked */
27 const char *host;       /* server hostname */
28 {
29     char *from, *cp, last_nws = '\0', *parens_from = NULL;
30     int parendepth, state, has_bare_name_part, has_host_part;
31     int addresscount = 1;
32
33     if (strncasecmp("From: ", buf, 6)
34         && strncasecmp("To: ", buf, 4)
35         && strncasecmp("Reply-To: ", buf, 10)
36         && strncasecmp("Return-Path: ", buf, 13)
37         && strncasecmp("Cc: ", buf, 4)
38         && strncasecmp("Bcc: ", buf, 5)
39         && strncasecmp("Resent-From: ", buf, 13)
40         && strncasecmp("Resent-To: ", buf, 11)
41         && strncasecmp("Resent-Cc: ", buf, 11)
42         && strncasecmp("Resent-Bcc: ", buf, 12)
43         && strncasecmp("Apparently-From:", buf, 16)
44         && strncasecmp("Apparently-To:", buf, 14)
45         && strncasecmp("Sender:", buf, 7)
46         && strncasecmp("Resent-Sender:", buf, 14)
47        ) {
48         return(buf);
49     }
50
51 #ifndef TESTMAIN
52     if (outlevel >= O_DEBUG)
53         error_build(0, 0, "About to rewrite %s", buf);
54 #else
55     /* make room to hack the address; buf must be malloced */
56     for (cp = buf; *cp; cp++)
57         if (*cp == ',' || isspace(*cp))
58             addresscount++;
59     buf = (char *)xrealloc(buf, strlen(buf) + addresscount * strlen(host) + 1);
60 #endif /* TESTMAIN */
61
62     /*
63      * This is going to foo up on some ill-formed addresses.
64      * Note that we don't rewrite the fake address <> in order to
65      * avoid screwing up bounce suppression with a null Return-Path.
66      */
67
68     parendepth = state = 0;
69     has_host_part = has_bare_name_part = FALSE;
70     for (from = buf; *from; from++)
71     {
72 #ifdef TESTMAIN
73         if (verbose)
74         {
75             printf("state %d: %s", state, buf);
76             printf("%*s^\n", from - buf + 10, " ");
77         }
78 #endif /* TESTMAIN */
79         if (state != 2)
80             if (*from == '(')
81                 ++parendepth;
82             else if (*from == ')')
83                 --parendepth;
84
85         if (!parendepth && !has_host_part)
86             switch (state)
87             {
88             case 0:     /* before header colon */
89                 if (*from == ':')
90                     state = 1;
91                 break;
92
93             case 1:     /* we've seen the colon, we're looking for addresses */
94                 if (!isspace(*from))
95                     last_nws = *from;
96                 if (*from == '<')
97                     state = 3;
98                 else if (*from == '@')
99                     has_host_part = TRUE;
100                 else if (*from == '"')
101                     state = 2;
102                 /*
103                  * Not expanding on last non-WS == ';' deals with groupnames,
104                  * an obscure misfeature described in sections
105                  * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
106                  */
107                 else if ((*from == ',' || HEADER_END(from))
108                          && has_bare_name_part
109                          && !has_host_part
110                          && last_nws != ';')
111                 {
112                     int hostlen;
113                     char *p;
114
115                     p = from;
116                     if (parens_from)
117                         from = parens_from;
118                     while (isspace(*from) || (*from == ','))
119                         --from;
120                     from++;
121                     hostlen = strlen(host);
122                     for (cp = from + strlen(from); cp >= from; --cp)
123                         cp[hostlen+1] = *cp;
124                     *from++ = '@';
125                     memcpy(from, host, hostlen);
126                     from = p + hostlen + 1;
127                     has_host_part = TRUE;
128                 } 
129                 else if (from[1] == '('
130                          && has_bare_name_part
131                          && !has_host_part
132                          && last_nws != ';' && last_nws != ')')
133                 {
134                     parens_from = from;
135                 } 
136                 else if (!isspace(*from))
137                     has_bare_name_part = TRUE;
138                 break;
139
140             case 2:     /* we're in a string */
141                 if (*from == '"')
142                     state = 1;
143                 break;
144
145             case 3:     /* we're in a <>-enclosed address */
146                 if (*from == '@')
147                     has_host_part = TRUE;
148                 else if (*from == '>' && from[-1] != '<')
149                 {
150                     state = 1;
151                     if (!has_host_part)
152                     {
153                         int hostlen;
154
155                         hostlen = strlen(host);
156                         for (cp = from + strlen(from); cp >= from; --cp)
157                             cp[hostlen+1] = *cp;
158                         *from++ = '@';
159                         memcpy(from, host, hostlen);
160                         from += hostlen;
161                         has_host_part = TRUE;
162                     }
163                 }
164                 break;
165             }
166
167         /*
168          * If we passed a comma, reset everything.
169          */
170         if (from[-1] == ',' && !parendepth) {
171           has_host_part = has_bare_name_part = FALSE;
172           parens_from = NULL;
173         }
174     }
175
176 #ifndef TESTMAIN
177     if (outlevel >= O_DEBUG)
178         error_complete(0, 0, "Rewritten version is %s", buf);
179 #endif /* TESTMAIN */
180     return(buf);
181 }
182
183 char *nxtaddr(hdr)
184 /* parse addresses in succession out of a specified RFC822 header */
185 const char *hdr;        /* header to be parsed, NUL to continue previous hdr */
186 {
187     static char *tp, address[POPBUFSIZE+1];
188     static const char *hp;
189     static int  state, oldstate;
190 #ifdef TESTMAIN
191     static const char *orighdr;
192 #endif /* TESTMAIN */
193     int parendepth = 0;
194
195 #define START_HDR       0       /* before header colon */
196 #define SKIP_JUNK       1       /* skip whitespace, \n, and junk */
197 #define BARE_ADDRESS    2       /* collecting address without delimiters */
198 #define INSIDE_DQUOTE   3       /* inside double quotes */
199 #define INSIDE_PARENS   4       /* inside parentheses */
200 #define INSIDE_BRACKETS 5       /* inside bracketed address */
201 #define ENDIT_ALL       6       /* after last address */
202
203     if (hdr)
204     {
205         hp = hdr;
206         state = START_HDR;
207 #ifdef TESTMAIN
208         orighdr = hdr;
209 #endif /* TESTMAIN */
210         tp = address;
211     }
212
213     for (; *hp; hp++)
214     {
215 #ifdef TESTMAIN
216         if (verbose)
217         {
218             printf("state %d: %s", state, orighdr);
219             printf("%*s^\n", hp - orighdr + 10, " ");
220         }
221 #endif /* TESTMAIN */
222
223         if (state == ENDIT_ALL)         /* after last address */
224             return(NULL);
225         else if (HEADER_END(hp))
226         {
227             state = ENDIT_ALL;
228             if (tp > address)
229             {
230                 while (isspace(*--tp))
231                     continue;
232                 *++tp = '\0';
233             }
234             return(tp > address ? (tp = address) : (char *)NULL);
235         }
236         else if (*hp == '\\')           /* handle RFC822 escaping */
237         {
238             if (state != INSIDE_PARENS)
239             {
240                 *tp++ = *hp++;                  /* take the escape */
241                 *tp++ = *hp;                    /* take following char */
242             }
243         }
244         else switch (state)
245         {
246         case START_HDR:   /* before header colon */
247             if (*hp == ':')
248                 state = SKIP_JUNK;
249             break;
250
251         case SKIP_JUNK:         /* looking for address start */
252             if (*hp == '"')     /* quoted string */
253             {
254                 oldstate = SKIP_JUNK;
255                 state = INSIDE_DQUOTE;
256                 *tp++ = *hp;
257             }
258             else if (*hp == '(')        /* address comment -- ignore */
259             {
260                 parendepth = 1;
261                 oldstate = SKIP_JUNK;
262                 state = INSIDE_PARENS;    
263             }
264             else if (*hp == '<')        /* begin <address> */
265             {
266                 state = INSIDE_BRACKETS;
267                 tp = address;
268             }
269             else if (*hp != ',' && !isspace(*hp))
270             {
271                 --hp;
272                 state = BARE_ADDRESS;
273             }
274             break;
275
276         case BARE_ADDRESS:      /* collecting address without delimiters */
277             if (*hp == ',')     /* end of address */
278             {
279                 if (tp > address)
280                 {
281                     *tp++ = '\0';
282                     state = SKIP_JUNK;
283                     return(tp = address);
284                 }
285             }
286             else if (*hp == '(')        /* beginning of comment */
287             {
288                 parendepth = 1;
289                 oldstate = BARE_ADDRESS;
290                 state = INSIDE_PARENS;    
291             }
292             else if (*hp == '<')        /* beginning of real address */
293             {
294                 state = INSIDE_BRACKETS;
295                 tp = address;
296             }
297             else if (!isspace(*hp))     /* just take it, ignoring whitespace */
298                 *tp++ = *hp;
299             break;
300
301         case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
302             if (*hp != '"')
303                 *tp++ = *hp;
304             else
305             {
306                 *tp++ = *hp;
307                 state = oldstate;
308             }
309             break;
310
311         case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
312             if (*hp == '(')
313                 ++parendepth;
314             else if (*hp == ')')
315                 --parendepth;
316             if (parendepth == 0)
317                 state = oldstate;
318             break;
319
320         case INSIDE_BRACKETS:   /* possible <>-enclosed address */
321             if (*hp == '>')     /* end of address */
322             {
323                 *tp++ = '\0';
324                 state = SKIP_JUNK;
325                 ++hp;
326                 return(tp = address);
327             }
328             else if (*hp == '<')        /* nested <> */
329                 tp = address;
330             else if (*hp == '"')        /* quoted address */
331             {
332                 *tp++ = *hp;
333                 oldstate = INSIDE_BRACKETS;
334                 state = INSIDE_DQUOTE;
335             }
336             else                        /* just copy address */
337                 *tp++ = *hp;
338             break;
339         }
340     }
341
342     return(NULL);
343 }
344
345 #ifdef TESTMAIN
346 static void parsebuf(char *longbuf, int reply)
347 {
348     char        *cp;
349
350     if (reply)
351     {
352         reply_hack(longbuf, "HOSTNAME.NET");
353         printf("Rewritten buffer: %s", longbuf);
354     }
355     else
356         if ((cp = nxtaddr(longbuf)) != (char *)NULL)
357             do {
358                 printf("\t-> \"%s\"\n", cp);
359             } while
360                 ((cp = nxtaddr((char *)NULL)) != (char *)NULL);
361 }
362
363
364
365 main(int argc, char *argv[])
366 {
367     char        buf[MSGBUFSIZE], longbuf[BUFSIZ];
368     int         ch, reply;
369     
370     verbose = reply = FALSE;
371     while ((ch = getopt(argc, argv, "rv")) != EOF)
372         switch(ch)
373         {
374         case 'r':
375             reply = TRUE;
376             break;
377
378         case 'v':
379             verbose = TRUE;
380             break;
381         }
382
383     while (fgets(buf, sizeof(buf)-1, stdin))
384     {
385         if (buf[0] == ' ' || buf[0] == '\t')
386             strcat(longbuf, buf);
387         else if (!strncasecmp("From: ", buf, 6)
388                     || !strncasecmp("To: ", buf, 4)
389                     || !strncasecmp("Reply-", buf, 6)
390                     || !strncasecmp("Cc: ", buf, 4)
391                     || !strncasecmp("Bcc: ", buf, 5))
392             strcpy(longbuf, buf);       
393         else if (longbuf[0])
394         {
395             if (verbose)
396                 fputs(longbuf, stdout);
397             parsebuf(longbuf, reply);
398             longbuf[0] = '\0';
399         }
400     }
401     if (longbuf[0])
402     {
403         if (verbose)
404             fputs(longbuf, stdout);
405         parsebuf(longbuf, reply);
406     }
407 }
408 #endif /* TESTMAIN */
409
410 /* rfc822.c end */