]> Pileus Git - ~andy/fetchmail/blob - rfc822.c
Bug fixes and internationalization improvements.
[~andy/fetchmail] / rfc822.c
1 /*
2  * rfc822.c -- code for slicing and dicing RFC822 mail headers
3  *
4  * Copyright 1997 by Eric S. Raymond
5  * For license terms, see the file COPYING in this directory.
6  */
7
8 #include  <stdio.h>
9 #include  <ctype.h>
10 #include  <string.h>
11 #if defined(STDC_HEADERS)
12 #include  <stdlib.h>
13 #endif
14
15 #include "config.h"
16 #include "fetchmail.h"
17 #include "i18n.h"
18
19 #define HEADER_END(p)   ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
20
21 #ifdef TESTMAIN
22 static int verbose;
23 char *program_name = "rfc822";
24 #endif /* TESTMAIN */
25
26 unsigned char *reply_hack(buf, host)
27 /* hack message headers so replies will work properly */
28 unsigned char *buf;             /* header to be hacked */
29 const unsigned char *host;      /* server hostname */
30 {
31     unsigned char *from, *cp, last_nws = '\0', *parens_from = NULL;
32     int parendepth, state, has_bare_name_part, has_host_part;
33 #ifndef TESTMAIN
34     int addresscount = 1;
35 #endif /* TESTMAIN */
36
37     if (strncasecmp("From:", buf, 5)
38         && strncasecmp("To:", buf, 3)
39         && strncasecmp("Reply-To:", buf, 9)
40         && strncasecmp("Return-Path:", buf, 12)
41         && strncasecmp("Cc:", buf, 3)
42         && strncasecmp("Bcc:", buf, 4)
43         && strncasecmp("Resent-From:", buf, 12)
44         && strncasecmp("Resent-To:", buf, 10)
45         && strncasecmp("Resent-Cc:", buf, 10)
46         && strncasecmp("Resent-Bcc:", buf, 11)
47         && strncasecmp("Apparently-From:", buf, 16)
48         && strncasecmp("Apparently-To:", buf, 14)
49         && strncasecmp("Sender:", buf, 7)
50         && strncasecmp("Resent-Sender:", buf, 14)
51        ) {
52         return(buf);
53     }
54
55 #ifndef TESTMAIN
56     if (outlevel >= O_DEBUG)
57         report_build(stdout, _("About to rewrite %s"), buf);
58
59     /* make room to hack the address; buf must be malloced */
60     for (cp = buf; *cp; cp++)
61         if (*cp == ',' || isspace(*cp))
62             addresscount++;
63     buf = (unsigned char *)xrealloc(buf, strlen(buf) + addresscount * strlen(host) + 1);
64 #endif /* TESTMAIN */
65
66     /*
67      * This is going to foo up on some ill-formed addresses.
68      * Note that we don't rewrite the fake address <> in order to
69      * avoid screwing up bounce suppression with a null Return-Path.
70      */
71
72     parendepth = state = 0;
73     has_host_part = has_bare_name_part = FALSE;
74     for (from = buf; *from; from++)
75     {
76 #ifdef TESTMAIN
77         if (verbose)
78         {
79             printf("state %d: %s", state, buf);
80             printf("%*s^\n", from - buf + 10, " ");
81         }
82 #endif /* TESTMAIN */
83         if (state != 2)
84         {
85             if (*from == '(')
86                 ++parendepth;
87             else if (*from == ')')
88                 --parendepth;
89         }
90
91         if (!parendepth && !has_host_part)
92             switch (state)
93             {
94             case 0:     /* before header colon */
95                 if (*from == ':')
96                     state = 1;
97                 break;
98
99             case 1:     /* we've seen the colon, we're looking for addresses */
100                 if (!isspace(*from))
101                     last_nws = *from;
102                 if (*from == '<')
103                     state = 3;
104                 else if (*from == '@')
105                     has_host_part = TRUE;
106                 else if (*from == '"')
107                     state = 2;
108                 /*
109                  * Not expanding on last non-WS == ';' deals with groupnames,
110                  * an obscure misfeature described in sections
111                  * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
112                  */
113                 else if ((*from == ',' || HEADER_END(from))
114                          && has_bare_name_part
115                          && !has_host_part
116                          && last_nws != ';')
117                 {
118                     int hostlen;
119                     unsigned char *p;
120
121                     p = from;
122                     if (parens_from)
123                         from = parens_from;
124                     while (isspace(*from) || (*from == ','))
125                         --from;
126                     from++;
127                     hostlen = strlen(host);
128                     for (cp = from + strlen(from); cp >= from; --cp)
129                         cp[hostlen+1] = *cp;
130                     *from++ = '@';
131                     memcpy(from, host, hostlen);
132                     from = p + hostlen + 1;
133                     has_host_part = TRUE;
134                 } 
135                 else if (from[1] == '('
136                          && has_bare_name_part
137                          && !has_host_part
138                          && last_nws != ';' && last_nws != ')')
139                 {
140                     parens_from = from;
141                 } 
142                 else if (!isspace(*from))
143                     has_bare_name_part = TRUE;
144                 break;
145
146             case 2:     /* we're in a string */
147                 if (*from == '"')
148                     state = 1;
149                 break;
150
151             case 3:     /* we're in a <>-enclosed address */
152                 if (*from == '@')
153                     has_host_part = TRUE;
154                 else if (*from == '>' && from[-1] != '<')
155                 {
156                     state = 1;
157                     if (!has_host_part)
158                     {
159                         int hostlen;
160
161                         hostlen = strlen(host);
162                         for (cp = from + strlen(from); cp >= from; --cp)
163                             cp[hostlen+1] = *cp;
164                         *from++ = '@';
165                         memcpy(from, host, hostlen);
166                         from += hostlen;
167                         has_host_part = TRUE;
168                     }
169                 }
170                 break;
171             }
172
173         /*
174          * If we passed a comma, reset everything.
175          */
176         if (from[-1] == ',' && !parendepth) {
177           has_host_part = has_bare_name_part = FALSE;
178           parens_from = NULL;
179         }
180     }
181
182 #ifndef TESTMAIN
183     if (outlevel >= O_DEBUG)
184         report_complete(stdout, _("Rewritten version is %s\n"), buf);
185 #endif /* TESTMAIN */
186     return(buf);
187 }
188
189 unsigned char *nxtaddr(hdr)
190 /* parse addresses in succession out of a specified RFC822 header */
191 const unsigned char *hdr;       /* header to be parsed, NUL to continue previous hdr */
192 {
193     static unsigned char *tp, address[POPBUFSIZE+1];
194     static const unsigned char *hp;
195     static int  state, oldstate;
196 #ifdef TESTMAIN
197     static const unsigned char *orighdr;
198 #endif /* TESTMAIN */
199     int parendepth = 0;
200
201 #define START_HDR       0       /* before header colon */
202 #define SKIP_JUNK       1       /* skip whitespace, \n, and junk */
203 #define BARE_ADDRESS    2       /* collecting address without delimiters */
204 #define INSIDE_DQUOTE   3       /* inside double quotes */
205 #define INSIDE_PARENS   4       /* inside parentheses */
206 #define INSIDE_BRACKETS 5       /* inside bracketed address */
207 #define ENDIT_ALL       6       /* after last address */
208
209     if (hdr)
210     {
211         hp = hdr;
212         state = START_HDR;
213 #ifdef TESTMAIN
214         orighdr = hdr;
215 #endif /* TESTMAIN */
216         tp = address;
217     }
218
219     for (; *hp; hp++)
220     {
221 #ifdef TESTMAIN
222         if (verbose)
223         {
224             printf("state %d: %s", state, orighdr);
225             printf("%*s^\n", hp - orighdr + 10, " ");
226         }
227 #endif /* TESTMAIN */
228
229         if (state == ENDIT_ALL)         /* after last address */
230             return(NULL);
231         else if (HEADER_END(hp))
232         {
233             state = ENDIT_ALL;
234             if (tp > address)
235             {
236                 while (isspace(*--tp))
237                     continue;
238                 *++tp = '\0';
239             }
240             return(tp > address ? (tp = address) : (unsigned char *)NULL);
241         }
242         else if (*hp == '\\')           /* handle RFC822 escaping */
243         {
244             if (state != INSIDE_PARENS)
245             {
246                 *tp++ = *hp++;                  /* take the escape */
247                 *tp++ = *hp;                    /* take following unsigned char */
248             }
249         }
250         else switch (state)
251         {
252         case START_HDR:   /* before header colon */
253             if (*hp == ':')
254                 state = SKIP_JUNK;
255             break;
256
257         case SKIP_JUNK:         /* looking for address start */
258             if (*hp == '"')     /* quoted string */
259             {
260                 oldstate = SKIP_JUNK;
261                 state = INSIDE_DQUOTE;
262                 *tp++ = *hp;
263             }
264             else if (*hp == '(')        /* address comment -- ignore */
265             {
266                 parendepth = 1;
267                 oldstate = SKIP_JUNK;
268                 state = INSIDE_PARENS;    
269             }
270             else if (*hp == '<')        /* begin <address> */
271             {
272                 state = INSIDE_BRACKETS;
273                 tp = address;
274             }
275             else if (*hp != ',' && !isspace(*hp))
276             {
277                 --hp;
278                 state = BARE_ADDRESS;
279             }
280             break;
281
282         case BARE_ADDRESS:      /* collecting address without delimiters */
283             if (*hp == ',')     /* end of address */
284             {
285                 if (tp > address)
286                 {
287                     *tp++ = '\0';
288                     state = SKIP_JUNK;
289                     return(tp = address);
290                 }
291             }
292             else if (*hp == '(')        /* beginning of comment */
293             {
294                 parendepth = 1;
295                 oldstate = BARE_ADDRESS;
296                 state = INSIDE_PARENS;    
297             }
298             else if (*hp == '<')        /* beginning of real address */
299             {
300                 state = INSIDE_BRACKETS;
301                 tp = address;
302             }
303             else if (!isspace(*hp))     /* just take it, ignoring whitespace */
304                 *tp++ = *hp;
305             break;
306
307         case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
308             if (*hp != '"')
309                 *tp++ = *hp;
310             else
311             {
312                 *tp++ = *hp;
313                 state = oldstate;
314             }
315             break;
316
317         case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
318             if (*hp == '(')
319                 ++parendepth;
320             else if (*hp == ')')
321                 --parendepth;
322             if (parendepth == 0)
323                 state = oldstate;
324             break;
325
326         case INSIDE_BRACKETS:   /* possible <>-enclosed address */
327             if (*hp == '>')     /* end of address */
328             {
329                 *tp++ = '\0';
330                 state = SKIP_JUNK;
331                 ++hp;
332                 return(tp = address);
333             }
334             else if (*hp == '<')        /* nested <> */
335                 tp = address;
336             else if (*hp == '"')        /* quoted address */
337             {
338                 *tp++ = *hp;
339                 oldstate = INSIDE_BRACKETS;
340                 state = INSIDE_DQUOTE;
341             }
342             else                        /* just copy address */
343                 *tp++ = *hp;
344             break;
345         }
346     }
347
348     return(NULL);
349 }
350
351 #ifdef TESTMAIN
352 static void parsebuf(unsigned char *longbuf, int reply)
353 {
354     unsigned char       *cp;
355
356     if (reply)
357     {
358         reply_hack(longbuf, "HOSTNAME.NET");
359         printf("Rewritten buffer: %s", longbuf);
360     }
361     else
362         if ((cp = nxtaddr(longbuf)) != (unsigned char *)NULL)
363             do {
364                 printf("\t-> \"%s\"\n", cp);
365             } while
366                 ((cp = nxtaddr((unsigned char *)NULL)) != (unsigned char *)NULL);
367 }
368
369
370
371 main(int argc, char *argv[])
372 {
373     unsigned char       buf[MSGBUFSIZE], longbuf[BUFSIZ];
374     int                 ch, reply;
375     
376     verbose = reply = FALSE;
377     while ((ch = getopt(argc, argv, "rv")) != EOF)
378         switch(ch)
379         {
380         case 'r':
381             reply = TRUE;
382             break;
383
384         case 'v':
385             verbose = TRUE;
386             break;
387         }
388
389     while (fgets(buf, sizeof(buf)-1, stdin))
390     {
391         if (buf[0] == ' ' || buf[0] == '\t')
392             strcat(longbuf, buf);
393         else if (!strncasecmp("From: ", buf, 6)
394                     || !strncasecmp("To: ", buf, 4)
395                     || !strncasecmp("Reply-", buf, 6)
396                     || !strncasecmp("Cc: ", buf, 4)
397                     || !strncasecmp("Bcc: ", buf, 5))
398             strcpy(longbuf, buf);       
399         else if (longbuf[0])
400         {
401             if (verbose)
402                 fputs(longbuf, stdout);
403             parsebuf(longbuf, reply);
404             longbuf[0] = '\0';
405         }
406     }
407     if (longbuf[0])
408     {
409         if (verbose)
410             fputs(longbuf, stdout);
411         parsebuf(longbuf, reply);
412     }
413 }
414 #endif /* TESTMAIN */
415
416 /* rfc822.c end */