]> Pileus Git - ~andy/fetchmail/blob - rfc822.c
Remove fetchmail dependencies.
[~andy/fetchmail] / rfc822.c
1 /*
2  * rfc822.c -- code for slicing and dicing RFC822 mail headers
3  *
4  * Copyright 1997 by Eric S. Raymond
5  * For license terms, see the file COPYING in this directory.
6  */
7
8 #include  <stdio.h>
9 #include  <ctype.h>
10 #include  <string.h>
11 #include  <stdlib.h>
12
13 #include "fetchmail.h"
14 #include "i18n.h"
15
16 #ifndef TRUE
17 #define TRUE 1
18 #define FALSE 0
19 #endif
20
21 #define HEADER_END(p)   ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
22
23 #ifdef MAIN
24 static int verbose;
25 char *program_name = "rfc822";
26 #endif /* MAIN */
27
28 unsigned char *reply_hack(buf, host)
29 /* hack message headers so replies will work properly */
30 unsigned char *buf;             /* header to be hacked */
31 const unsigned char *host;      /* server hostname */
32 {
33     unsigned char *from, *cp, last_nws = '\0', *parens_from = NULL;
34     int parendepth, state, has_bare_name_part, has_host_part;
35 #ifndef MAIN
36     int addresscount = 1;
37 #endif /* MAIN */
38
39     if (strncasecmp("From:", buf, 5)
40         && strncasecmp("To:", buf, 3)
41         && strncasecmp("Reply-To:", buf, 9)
42         && strncasecmp("Return-Path:", buf, 12)
43         && strncasecmp("Cc:", buf, 3)
44         && strncasecmp("Bcc:", buf, 4)
45         && strncasecmp("Resent-From:", buf, 12)
46         && strncasecmp("Resent-To:", buf, 10)
47         && strncasecmp("Resent-Cc:", buf, 10)
48         && strncasecmp("Resent-Bcc:", buf, 11)
49         && strncasecmp("Apparently-From:", buf, 16)
50         && strncasecmp("Apparently-To:", buf, 14)
51         && strncasecmp("Sender:", buf, 7)
52         && strncasecmp("Resent-Sender:", buf, 14)
53        ) {
54         return(buf);
55     }
56
57 #ifndef MAIN
58     if (outlevel >= O_DEBUG)
59         report_build(stdout, GT_("About to rewrite %s"), buf);
60
61     /* make room to hack the address; buf must be malloced */
62     for (cp = buf; *cp; cp++)
63         if (*cp == ',' || isspace(*cp))
64             addresscount++;
65     buf = (unsigned char *)xrealloc(buf, strlen(buf) + addresscount * strlen(host) + 1);
66 #endif /* MAIN */
67
68     /*
69      * This is going to foo up on some ill-formed addresses.
70      * Note that we don't rewrite the fake address <> in order to
71      * avoid screwing up bounce suppression with a null Return-Path.
72      */
73
74     parendepth = state = 0;
75     has_host_part = has_bare_name_part = FALSE;
76     for (from = buf; *from; from++)
77     {
78 #ifdef MAIN
79         if (verbose)
80         {
81             printf("state %d: %s", state, buf);
82             printf("%*s^\n", from - buf + 10, " ");
83         }
84 #endif /* MAIN */
85         if (state != 2)
86         {
87             if (*from == '(')
88                 ++parendepth;
89             else if (*from == ')')
90                 --parendepth;
91         }
92
93         if (!parendepth && !has_host_part)
94             switch (state)
95             {
96             case 0:     /* before header colon */
97                 if (*from == ':')
98                     state = 1;
99                 break;
100
101             case 1:     /* we've seen the colon, we're looking for addresses */
102                 if (!isspace(*from))
103                     last_nws = *from;
104                 if (*from == '<')
105                     state = 3;
106                 else if (*from == '@' || *from == '!')
107                     has_host_part = TRUE;
108                 else if (*from == '"')
109                     state = 2;
110                 /*
111                  * Not expanding on last non-WS == ';' deals with groupnames,
112                  * an obscure misfeature described in sections
113                  * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
114                  */
115                 else if ((*from == ',' || HEADER_END(from))
116                          && has_bare_name_part
117                          && !has_host_part
118                          && last_nws != ';')
119                 {
120                     int hostlen;
121                     unsigned char *p;
122
123                     p = from;
124                     if (parens_from)
125                         from = parens_from;
126                     while (isspace(*from) || (*from == ','))
127                         --from;
128                     from++;
129                     hostlen = strlen(host);
130                     for (cp = from + strlen(from); cp >= from; --cp)
131                         cp[hostlen+1] = *cp;
132                     *from++ = '@';
133                     memcpy(from, host, hostlen);
134                     from = p + hostlen + 1;
135                     has_host_part = TRUE;
136                 } 
137                 else if (from[1] == '('
138                          && has_bare_name_part
139                          && !has_host_part
140                          && last_nws != ';' && last_nws != ')')
141                 {
142                     parens_from = from;
143                 } 
144                 else if (!isspace(*from))
145                     has_bare_name_part = TRUE;
146                 break;
147
148             case 2:     /* we're in a string */
149                 if (*from == '"')
150                 {
151                     char        *bp;
152                     int         bscount;
153
154                     bscount = 0;
155                     for (bp = from - 1; *bp == '\\'; bp--)
156                         bscount++;
157                     if (!(bscount % 2))
158                         state = 1;
159                 }
160                 break;
161
162             case 3:     /* we're in a <>-enclosed address */
163                 if (*from == '@' || *from == '!')
164                     has_host_part = TRUE;
165                 else if (*from == '>' && from[-1] != '<')
166                 {
167                     state = 1;
168                     if (!has_host_part)
169                     {
170                         int hostlen;
171
172                         hostlen = strlen(host);
173                         for (cp = from + strlen(from); cp >= from; --cp)
174                             cp[hostlen+1] = *cp;
175                         *from++ = '@';
176                         memcpy(from, host, hostlen);
177                         from += hostlen;
178                         has_host_part = TRUE;
179                     }
180                 }
181                 break;
182             }
183
184         /*
185          * If we passed a comma, reset everything.
186          */
187         if (from[-1] == ',' && !parendepth) {
188           has_host_part = has_bare_name_part = FALSE;
189           parens_from = NULL;
190         }
191     }
192
193 #ifndef MAIN
194     if (outlevel >= O_DEBUG)
195         report_complete(stdout, GT_("Rewritten version is %s\n"), buf);
196 #endif /* MAIN */
197     return(buf);
198 }
199
200 unsigned char *nxtaddr(hdr)
201 /* parse addresses in succession out of a specified RFC822 header */
202 const unsigned char *hdr;       /* header to be parsed, NUL to continue previous hdr */
203 {
204     static unsigned char address[POPBUFSIZE+1];
205     static int tp;
206     static const unsigned char *hp;
207     static int  state, oldstate;
208 #ifdef MAIN
209     static const unsigned char *orighdr;
210 #endif /* MAIN */
211     int parendepth = 0;
212
213 #define START_HDR       0       /* before header colon */
214 #define SKIP_JUNK       1       /* skip whitespace, \n, and junk */
215 #define BARE_ADDRESS    2       /* collecting address without delimiters */
216 #define INSIDE_DQUOTE   3       /* inside double quotes */
217 #define INSIDE_PARENS   4       /* inside parentheses */
218 #define INSIDE_BRACKETS 5       /* inside bracketed address */
219 #define ENDIT_ALL       6       /* after last address */
220
221 #define NEXTTP()        ((tp < sizeof(address)-1) ? tp++ : tp)
222
223     if (hdr)
224     {
225         hp = hdr;
226         state = START_HDR;
227 #ifdef MAIN
228         orighdr = hdr;
229 #endif /* MAIN */
230         tp = 0;
231     }
232
233     for (; *hp; hp++)
234     {
235 #ifdef MAIN
236         if (verbose)
237         {
238             printf("state %d: %s", state, orighdr);
239             printf("%*s^\n", hp - orighdr + 10, " ");
240         }
241 #endif /* MAIN */
242
243         if (state == ENDIT_ALL)         /* after last address */
244             return(NULL);
245         else if (HEADER_END(hp))
246         {
247             state = ENDIT_ALL;
248             if (tp)
249             {
250                 while (isspace(address[--tp]))
251                     continue;
252                 address[++tp] = '\0';
253                 tp = 0;
254                 return (address);
255             }
256             return((unsigned char *)NULL);
257         }
258         else if (*hp == '\\')           /* handle RFC822 escaping */
259         {
260             if (state != INSIDE_PARENS)
261             {
262                 address[NEXTTP()] = *hp++;      /* take the escape */
263                 address[NEXTTP()] = *hp;        /* take following unsigned char */
264             }
265         }
266         else switch (state)
267         {
268         case START_HDR:   /* before header colon */
269             if (*hp == ':')
270                 state = SKIP_JUNK;
271             break;
272
273         case SKIP_JUNK:         /* looking for address start */
274             if (*hp == '"')     /* quoted string */
275             {
276                 oldstate = SKIP_JUNK;
277                 state = INSIDE_DQUOTE;
278                 address[NEXTTP()] = *hp;
279             }
280             else if (*hp == '(')        /* address comment -- ignore */
281             {
282                 parendepth = 1;
283                 oldstate = SKIP_JUNK;
284                 state = INSIDE_PARENS;    
285             }
286             else if (*hp == '<')        /* begin <address> */
287             {
288                 state = INSIDE_BRACKETS;
289                 tp = 0;
290             }
291             else if (*hp != ',' && !isspace(*hp))
292             {
293                 --hp;
294                 state = BARE_ADDRESS;
295             }
296             break;
297
298         case BARE_ADDRESS:      /* collecting address without delimiters */
299             if (*hp == ',')     /* end of address */
300             {
301                 if (tp)
302                 {
303                     address[NEXTTP()] = '\0';
304                     state = SKIP_JUNK;
305                     tp = 0;
306                     return(address);
307                 }
308             }
309             else if (*hp == '(')        /* beginning of comment */
310             {
311                 parendepth = 1;
312                 oldstate = BARE_ADDRESS;
313                 state = INSIDE_PARENS;    
314             }
315             else if (*hp == '<')        /* beginning of real address */
316             {
317                 state = INSIDE_BRACKETS;
318                 tp = 0;
319             }
320             else if (*hp == '"')        /* quoted word, copy verbatim */
321             {
322                 oldstate = state;
323                 state = INSIDE_DQUOTE;
324                 address[NEXTTP()] = *hp;
325             }
326             else if (!isspace(*hp))     /* just take it, ignoring whitespace */
327                 address[NEXTTP()] = *hp;
328             break;
329
330         case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
331             if (*hp != '"')
332                 address[NEXTTP()] = *hp;
333             else
334             {
335                 address[NEXTTP()] = *hp;
336                 state = oldstate;
337             }
338             break;
339
340         case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
341             if (*hp == '(')
342                 ++parendepth;
343             else if (*hp == ')')
344                 --parendepth;
345             if (parendepth == 0)
346                 state = oldstate;
347             break;
348
349         case INSIDE_BRACKETS:   /* possible <>-enclosed address */
350             if (*hp == '>')     /* end of address */
351             {
352                 address[NEXTTP()] = '\0';
353                 state = SKIP_JUNK;
354                 ++hp;
355                 tp = 0;
356                 return(address);
357             }
358             else if (*hp == '<')        /* nested <> */
359                 tp = 0;
360             else if (*hp == '"')        /* quoted address */
361             {
362                 address[NEXTTP()] = *hp;
363                 oldstate = INSIDE_BRACKETS;
364                 state = INSIDE_DQUOTE;
365             }
366             else                        /* just copy address */
367                 address[NEXTTP()] = *hp;
368             break;
369         }
370     }
371
372     return(NULL);
373 }
374
375 #ifdef MAIN
376 static void parsebuf(unsigned char *longbuf, int reply)
377 {
378     unsigned char       *cp;
379
380     if (reply)
381     {
382         reply_hack(longbuf, "HOSTNAME.NET");
383         printf("Rewritten buffer: %s", longbuf);
384     }
385     else
386         if ((cp = nxtaddr(longbuf)) != (unsigned char *)NULL)
387             do {
388                 printf("\t-> \"%s\"\n", cp);
389             } while
390                 ((cp = nxtaddr((unsigned char *)NULL)) != (unsigned char *)NULL);
391 }
392
393
394
395 main(int argc, char *argv[])
396 {
397     unsigned char       buf[MSGBUFSIZE], longbuf[BUFSIZ];
398     int                 ch, reply;
399     
400     verbose = reply = FALSE;
401     while ((ch = getopt(argc, argv, "rv")) != EOF)
402         switch(ch)
403         {
404         case 'r':
405             reply = TRUE;
406             break;
407
408         case 'v':
409             verbose = TRUE;
410             break;
411         }
412
413     while (fgets(buf, sizeof(buf)-1, stdin))
414     {
415         if (buf[0] == ' ' || buf[0] == '\t')
416             strcat(longbuf, buf);
417         else if (!strncasecmp("From: ", buf, 6)
418                     || !strncasecmp("To: ", buf, 4)
419                     || !strncasecmp("Reply-", buf, 6)
420                     || !strncasecmp("Cc: ", buf, 4)
421                     || !strncasecmp("Bcc: ", buf, 5))
422             strcpy(longbuf, buf);       
423         else if (longbuf[0])
424         {
425             if (verbose)
426                 fputs(longbuf, stdout);
427             parsebuf(longbuf, reply);
428             longbuf[0] = '\0';
429         }
430     }
431     if (longbuf[0])
432     {
433         if (verbose)
434             fputs(longbuf, stdout);
435         parsebuf(longbuf, reply);
436     }
437 }
438 #endif /* MAIN */
439
440 /* rfc822.c end */