]> Pileus Git - ~andy/fetchmail/blob - rfc822.c
_( -> GT_(
[~andy/fetchmail] / rfc822.c
1 /*
2  * rfc822.c -- code for slicing and dicing RFC822 mail headers
3  *
4  * Copyright 1997 by Eric S. Raymond
5  * For license terms, see the file COPYING in this directory.
6  */
7
8 #include  <stdio.h>
9 #include  <ctype.h>
10 #include  <string.h>
11 #if defined(STDC_HEADERS)
12 #include  <stdlib.h>
13 #endif
14
15 #include "config.h"
16 #include "fetchmail.h"
17 #include "i18n.h"
18
19 #define HEADER_END(p)   ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
20
21 #ifdef TESTMAIN
22 static int verbose;
23 char *program_name = "rfc822";
24 #endif /* TESTMAIN */
25
26 unsigned char *reply_hack(buf, host)
27 /* hack message headers so replies will work properly */
28 unsigned char *buf;             /* header to be hacked */
29 const unsigned char *host;      /* server hostname */
30 {
31     unsigned char *from, *cp, last_nws = '\0', *parens_from = NULL;
32     int parendepth, state, has_bare_name_part, has_host_part;
33 #ifndef TESTMAIN
34     int addresscount = 1;
35 #endif /* TESTMAIN */
36
37     if (strncasecmp("From:", buf, 5)
38         && strncasecmp("To:", buf, 3)
39         && strncasecmp("Reply-To:", buf, 9)
40         && strncasecmp("Return-Path:", buf, 12)
41         && strncasecmp("Cc:", buf, 3)
42         && strncasecmp("Bcc:", buf, 4)
43         && strncasecmp("Resent-From:", buf, 12)
44         && strncasecmp("Resent-To:", buf, 10)
45         && strncasecmp("Resent-Cc:", buf, 10)
46         && strncasecmp("Resent-Bcc:", buf, 11)
47         && strncasecmp("Apparently-From:", buf, 16)
48         && strncasecmp("Apparently-To:", buf, 14)
49         && strncasecmp("Sender:", buf, 7)
50         && strncasecmp("Resent-Sender:", buf, 14)
51        ) {
52         return(buf);
53     }
54
55 #ifndef TESTMAIN
56     if (outlevel >= O_DEBUG)
57         report_build(stdout, GT_("About to rewrite %s"), buf);
58
59     /* make room to hack the address; buf must be malloced */
60     for (cp = buf; *cp; cp++)
61         if (*cp == ',' || isspace(*cp))
62             addresscount++;
63     buf = (unsigned char *)xrealloc(buf, strlen(buf) + addresscount * strlen(host) + 1);
64 #endif /* TESTMAIN */
65
66     /*
67      * This is going to foo up on some ill-formed addresses.
68      * Note that we don't rewrite the fake address <> in order to
69      * avoid screwing up bounce suppression with a null Return-Path.
70      */
71
72     parendepth = state = 0;
73     has_host_part = has_bare_name_part = FALSE;
74     for (from = buf; *from; from++)
75     {
76 #ifdef TESTMAIN
77         if (verbose)
78         {
79             printf("state %d: %s", state, buf);
80             printf("%*s^\n", from - buf + 10, " ");
81         }
82 #endif /* TESTMAIN */
83         if (state != 2)
84         {
85             if (*from == '(')
86                 ++parendepth;
87             else if (*from == ')')
88                 --parendepth;
89         }
90
91         if (!parendepth && !has_host_part)
92             switch (state)
93             {
94             case 0:     /* before header colon */
95                 if (*from == ':')
96                     state = 1;
97                 break;
98
99             case 1:     /* we've seen the colon, we're looking for addresses */
100                 if (!isspace(*from))
101                     last_nws = *from;
102                 if (*from == '<')
103                     state = 3;
104                 else if (*from == '@' || *from == '!')
105                     has_host_part = TRUE;
106                 else if (*from == '"')
107                     state = 2;
108                 /*
109                  * Not expanding on last non-WS == ';' deals with groupnames,
110                  * an obscure misfeature described in sections
111                  * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
112                  */
113                 else if ((*from == ',' || HEADER_END(from))
114                          && has_bare_name_part
115                          && !has_host_part
116                          && last_nws != ';')
117                 {
118                     int hostlen;
119                     unsigned char *p;
120
121                     p = from;
122                     if (parens_from)
123                         from = parens_from;
124                     while (isspace(*from) || (*from == ','))
125                         --from;
126                     from++;
127                     hostlen = strlen(host);
128                     for (cp = from + strlen(from); cp >= from; --cp)
129                         cp[hostlen+1] = *cp;
130                     *from++ = '@';
131                     memcpy(from, host, hostlen);
132                     from = p + hostlen + 1;
133                     has_host_part = TRUE;
134                 } 
135                 else if (from[1] == '('
136                          && has_bare_name_part
137                          && !has_host_part
138                          && last_nws != ';' && last_nws != ')')
139                 {
140                     parens_from = from;
141                 } 
142                 else if (!isspace(*from))
143                     has_bare_name_part = TRUE;
144                 break;
145
146             case 2:     /* we're in a string */
147                 if (*from == '"')
148                 {
149                     char        *bp;
150                     int         bscount;
151
152                     bscount = 0;
153                     for (bp = from - 1; *bp == '\\'; bp--)
154                         bscount++;
155                     if (!(bscount % 2))
156                         state = 1;
157                 }
158                 break;
159
160             case 3:     /* we're in a <>-enclosed address */
161                 if (*from == '@' || *from == '!')
162                     has_host_part = TRUE;
163                 else if (*from == '>' && from[-1] != '<')
164                 {
165                     state = 1;
166                     if (!has_host_part)
167                     {
168                         int hostlen;
169
170                         hostlen = strlen(host);
171                         for (cp = from + strlen(from); cp >= from; --cp)
172                             cp[hostlen+1] = *cp;
173                         *from++ = '@';
174                         memcpy(from, host, hostlen);
175                         from += hostlen;
176                         has_host_part = TRUE;
177                     }
178                 }
179                 break;
180             }
181
182         /*
183          * If we passed a comma, reset everything.
184          */
185         if (from[-1] == ',' && !parendepth) {
186           has_host_part = has_bare_name_part = FALSE;
187           parens_from = NULL;
188         }
189     }
190
191 #ifndef TESTMAIN
192     if (outlevel >= O_DEBUG)
193         report_complete(stdout, GT_("Rewritten version is %s\n"), buf);
194 #endif /* TESTMAIN */
195     return(buf);
196 }
197
198 unsigned char *nxtaddr(hdr)
199 /* parse addresses in succession out of a specified RFC822 header */
200 const unsigned char *hdr;       /* header to be parsed, NUL to continue previous hdr */
201 {
202     static unsigned char address[POPBUFSIZE+1];
203     static int tp;
204     static const unsigned char *hp;
205     static int  state, oldstate;
206 #ifdef TESTMAIN
207     static const unsigned char *orighdr;
208 #endif /* TESTMAIN */
209     int parendepth = 0;
210
211 #define START_HDR       0       /* before header colon */
212 #define SKIP_JUNK       1       /* skip whitespace, \n, and junk */
213 #define BARE_ADDRESS    2       /* collecting address without delimiters */
214 #define INSIDE_DQUOTE   3       /* inside double quotes */
215 #define INSIDE_PARENS   4       /* inside parentheses */
216 #define INSIDE_BRACKETS 5       /* inside bracketed address */
217 #define ENDIT_ALL       6       /* after last address */
218
219 #define NEXTTP()        ((tp < sizeof(address)-1) ? tp++ : tp)
220
221     if (hdr)
222     {
223         hp = hdr;
224         state = START_HDR;
225 #ifdef TESTMAIN
226         orighdr = hdr;
227 #endif /* TESTMAIN */
228         tp = 0;
229     }
230
231     for (; *hp; hp++)
232     {
233 #ifdef TESTMAIN
234         if (verbose)
235         {
236             printf("state %d: %s", state, orighdr);
237             printf("%*s^\n", hp - orighdr + 10, " ");
238         }
239 #endif /* TESTMAIN */
240
241         if (state == ENDIT_ALL)         /* after last address */
242             return(NULL);
243         else if (HEADER_END(hp))
244         {
245             state = ENDIT_ALL;
246             if (tp)
247             {
248                 while (isspace(address[--tp]))
249                     continue;
250                 address[++tp] = '\0';
251                 tp = 0;
252                 return (address);
253             }
254             return((unsigned char *)NULL);
255         }
256         else if (*hp == '\\')           /* handle RFC822 escaping */
257         {
258             if (state != INSIDE_PARENS)
259             {
260                 address[NEXTTP()] = *hp++;      /* take the escape */
261                 address[NEXTTP()] = *hp;        /* take following unsigned char */
262             }
263         }
264         else switch (state)
265         {
266         case START_HDR:   /* before header colon */
267             if (*hp == ':')
268                 state = SKIP_JUNK;
269             break;
270
271         case SKIP_JUNK:         /* looking for address start */
272             if (*hp == '"')     /* quoted string */
273             {
274                 oldstate = SKIP_JUNK;
275                 state = INSIDE_DQUOTE;
276                 address[NEXTTP()] = *hp;
277             }
278             else if (*hp == '(')        /* address comment -- ignore */
279             {
280                 parendepth = 1;
281                 oldstate = SKIP_JUNK;
282                 state = INSIDE_PARENS;    
283             }
284             else if (*hp == '<')        /* begin <address> */
285             {
286                 state = INSIDE_BRACKETS;
287                 tp = 0;
288             }
289             else if (*hp != ',' && !isspace(*hp))
290             {
291                 --hp;
292                 state = BARE_ADDRESS;
293             }
294             break;
295
296         case BARE_ADDRESS:      /* collecting address without delimiters */
297             if (*hp == ',')     /* end of address */
298             {
299                 if (tp)
300                 {
301                     address[NEXTTP()] = '\0';
302                     state = SKIP_JUNK;
303                     tp = 0;
304                     return(address);
305                 }
306             }
307             else if (*hp == '(')        /* beginning of comment */
308             {
309                 parendepth = 1;
310                 oldstate = BARE_ADDRESS;
311                 state = INSIDE_PARENS;    
312             }
313             else if (*hp == '<')        /* beginning of real address */
314             {
315                 state = INSIDE_BRACKETS;
316                 tp = 0;
317             }
318             else if (*hp == '"')        /* quoted word, copy verbatim */
319             {
320                 oldstate = state;
321                 state = INSIDE_DQUOTE;
322                 address[NEXTTP()] = *hp;
323             }
324             else if (!isspace(*hp))     /* just take it, ignoring whitespace */
325                 address[NEXTTP()] = *hp;
326             break;
327
328         case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
329             if (*hp != '"')
330                 address[NEXTTP()] = *hp;
331             else
332             {
333                 address[NEXTTP()] = *hp;
334                 state = oldstate;
335             }
336             break;
337
338         case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
339             if (*hp == '(')
340                 ++parendepth;
341             else if (*hp == ')')
342                 --parendepth;
343             if (parendepth == 0)
344                 state = oldstate;
345             break;
346
347         case INSIDE_BRACKETS:   /* possible <>-enclosed address */
348             if (*hp == '>')     /* end of address */
349             {
350                 address[NEXTTP()] = '\0';
351                 state = SKIP_JUNK;
352                 ++hp;
353                 tp = 0;
354                 return(address);
355             }
356             else if (*hp == '<')        /* nested <> */
357                 tp = 0;
358             else if (*hp == '"')        /* quoted address */
359             {
360                 address[NEXTTP()] = *hp;
361                 oldstate = INSIDE_BRACKETS;
362                 state = INSIDE_DQUOTE;
363             }
364             else                        /* just copy address */
365                 address[NEXTTP()] = *hp;
366             break;
367         }
368     }
369
370     return(NULL);
371 }
372
373 #ifdef TESTMAIN
374 static void parsebuf(unsigned char *longbuf, int reply)
375 {
376     unsigned char       *cp;
377
378     if (reply)
379     {
380         reply_hack(longbuf, "HOSTNAME.NET");
381         printf("Rewritten buffer: %s", longbuf);
382     }
383     else
384         if ((cp = nxtaddr(longbuf)) != (unsigned char *)NULL)
385             do {
386                 printf("\t-> \"%s\"\n", cp);
387             } while
388                 ((cp = nxtaddr((unsigned char *)NULL)) != (unsigned char *)NULL);
389 }
390
391
392
393 main(int argc, char *argv[])
394 {
395     unsigned char       buf[MSGBUFSIZE], longbuf[BUFSIZ];
396     int                 ch, reply;
397     
398     verbose = reply = FALSE;
399     while ((ch = getopt(argc, argv, "rv")) != EOF)
400         switch(ch)
401         {
402         case 'r':
403             reply = TRUE;
404             break;
405
406         case 'v':
407             verbose = TRUE;
408             break;
409         }
410
411     while (fgets(buf, sizeof(buf)-1, stdin))
412     {
413         if (buf[0] == ' ' || buf[0] == '\t')
414             strcat(longbuf, buf);
415         else if (!strncasecmp("From: ", buf, 6)
416                     || !strncasecmp("To: ", buf, 4)
417                     || !strncasecmp("Reply-", buf, 6)
418                     || !strncasecmp("Cc: ", buf, 4)
419                     || !strncasecmp("Bcc: ", buf, 5))
420             strcpy(longbuf, buf);       
421         else if (longbuf[0])
422         {
423             if (verbose)
424                 fputs(longbuf, stdout);
425             parsebuf(longbuf, reply);
426             longbuf[0] = '\0';
427         }
428     }
429     if (longbuf[0])
430     {
431         if (verbose)
432             fputs(longbuf, stdout);
433         parsebuf(longbuf, reply);
434     }
435 }
436 #endif /* TESTMAIN */
437
438 /* rfc822.c end */