]> Pileus Git - ~andy/fetchmail/blob - rfc822.c
9b574b55023c839d031d9ace7cf8f8eaf27ba720
[~andy/fetchmail] / rfc822.c
1 /*
2  * rfc822.c -- code for slicing and dicing RFC822 mail headers
3  *
4  * Copyright 1997 by Eric S. Raymond
5  * For license terms, see the file COPYING in this directory.
6  */
7
8 #include  <stdio.h>
9 #include  <ctype.h>
10 #include  <string.h>
11 #if defined(STDC_HEADERS)
12 #include  <stdlib.h>
13 #endif
14
15 #include "fetchmail.h"
16
17 #define HEADER_END(p)   ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
18
19 #ifdef TESTMAIN
20 static int verbose;
21 char *program_name = "rfc822";
22 #endif /* TESTMAIN */
23
24 char *reply_hack(buf, host)
25 /* hack message headers so replies will work properly */
26 char *buf;              /* header to be hacked */
27 const char *host;       /* server hostname */
28 {
29     char *from, *cp, last_nws = '\0', *parens_from = NULL;
30     int parendepth, state, has_bare_name_part, has_host_part;
31     int addresscount = 1;
32
33     if (strncasecmp("From: ", buf, 6)
34         && strncasecmp("To: ", buf, 4)
35         && strncasecmp("Reply-To: ", buf, 10)
36         && strncasecmp("Return-Path: ", buf, 13)
37         && strncasecmp("Cc: ", buf, 4)
38         && strncasecmp("Bcc: ", buf, 5)
39         && strncasecmp("Resent-From: ", buf, 13)
40         && strncasecmp("Resent-To: ", buf, 11)
41         && strncasecmp("Resent-Cc: ", buf, 11)
42         && strncasecmp("Resent-Bcc: ", buf, 12)
43         && strncasecmp("Apparently-From:", buf, 16)
44         && strncasecmp("Apparently-To:", buf, 14)
45         && strncasecmp("Sender:", buf, 7)
46         && strncasecmp("Resent-Sender:", buf, 14)
47        ) {
48         return(buf);
49     }
50
51 #ifndef TESTMAIN
52     /* make room to hack the address; buf must be malloced */
53     for (cp = buf; *cp; cp++)
54         if (*cp == ',' || isspace(*cp))
55             addresscount++;
56     buf = (char *)xrealloc(buf, strlen(buf) + addresscount * strlen(host) + 1);
57 #endif /* TESTMAIN */
58
59     /*
60      * This is going to foo up on some ill-formed addresses.
61      * Note that we don't rewrite the fake address <> in order to
62      * avoid screwing up bounce suppression with a null Return-Path.
63      */
64
65     parendepth = state = 0;
66     has_host_part = has_bare_name_part = FALSE;
67     for (from = buf; *from; from++)
68     {
69 #ifdef TESTMAIN
70         if (verbose)
71         {
72             printf("state %d: %s", state, buf);
73             printf("%*s^\n", from - buf + 10, " ");
74         }
75 #endif /* TESTMAIN */
76         if (state != 2)
77             if (*from == '(')
78                 ++parendepth;
79             else if (*from == ')')
80                 --parendepth;
81
82         if (!parendepth && !has_host_part)
83             switch (state)
84             {
85             case 0:     /* before header colon */
86                 if (*from == ':')
87                     state = 1;
88                 break;
89
90             case 1:     /* we've seen the colon, we're looking for addresses */
91                 if (!isspace(*from))
92                     last_nws = *from;
93                 if (*from == '<')
94                     state = 3;
95                 else if (*from == '@')
96                     has_host_part = TRUE;
97                 else if (*from == '"')
98                     state = 2;
99                 /*
100                  * Not expanding on last non-WS == ';' deals with groupnames,
101                  * an obscure misfeature described in sections
102                  * 6.1, 6.2.6, and A.1.5 of the RFC822 standard.
103                  */
104                 else if ((*from == ',' || HEADER_END(from))
105                          && has_bare_name_part
106                          && !has_host_part
107                          && last_nws != ';')
108                 {
109                     int hostlen;
110                     char *p;
111
112                     p = from;
113                     if (parens_from)
114                         from = parens_from;
115                     while (isspace(*from) || (*from == ','))
116                         --from;
117                     from++;
118                     hostlen = strlen(host);
119                     for (cp = from + strlen(from); cp >= from; --cp)
120                         cp[hostlen+1] = *cp;
121                     *from++ = '@';
122                     memcpy(from, host, hostlen);
123                     from = p + hostlen + 1;
124                     has_host_part = TRUE;
125                 } 
126                 else if (from[1] == '('
127                          && has_bare_name_part
128                          && !has_host_part
129                          && last_nws != ';' && last_nws != ')')
130                 {
131                     parens_from = from;
132                 } 
133                 else if (!isspace(*from))
134                     has_bare_name_part = TRUE;
135                 break;
136
137             case 2:     /* we're in a string */
138                 if (*from == '"')
139                     state = 1;
140                 break;
141
142             case 3:     /* we're in a <>-enclosed address */
143                 if (*from == '@')
144                     has_host_part = TRUE;
145                 else if (*from == '>' && from[-1] != '<')
146                 {
147                     state = 1;
148                     if (!has_host_part)
149                     {
150                         int hostlen;
151
152                         hostlen = strlen(host);
153                         for (cp = from + strlen(from); cp >= from; --cp)
154                             cp[hostlen+1] = *cp;
155                         *from++ = '@';
156                         memcpy(from, host, hostlen);
157                         from += hostlen;
158                         has_host_part = TRUE;
159                     }
160                 }
161                 break;
162             }
163
164         /*
165          * If we passed a comma, reset everything.
166          */
167         if (from[-1] == ',' && !parendepth) {
168           has_host_part = has_bare_name_part = FALSE;
169           parens_from = NULL;
170         }
171     }
172
173     return(buf);
174 }
175
176 char *nxtaddr(hdr)
177 /* parse addresses in succession out of a specified RFC822 header */
178 const char *hdr;        /* header to be parsed, NUL to continue previous hdr */
179 {
180     static char *tp, address[POPBUFSIZE+1];
181     static const char *hp;
182     static int  state, oldstate;
183 #ifdef TESTMAIN
184     static const char *orighdr;
185 #endif /* TESTMAIN */
186     int parendepth = 0;
187
188 #define START_HDR       0       /* before header colon */
189 #define SKIP_JUNK       1       /* skip whitespace, \n, and junk */
190 #define BARE_ADDRESS    2       /* collecting address without delimiters */
191 #define INSIDE_DQUOTE   3       /* inside double quotes */
192 #define INSIDE_PARENS   4       /* inside parentheses */
193 #define INSIDE_BRACKETS 5       /* inside bracketed address */
194 #define ENDIT_ALL       6       /* after last address */
195
196     if (hdr)
197     {
198         hp = hdr;
199         state = START_HDR;
200 #ifdef TESTMAIN
201         orighdr = hdr;
202 #endif /* TESTMAIN */
203         tp = address;
204     }
205
206     for (; *hp; hp++)
207     {
208 #ifdef TESTMAIN
209         if (verbose)
210         {
211             printf("state %d: %s", state, orighdr);
212             printf("%*s^\n", hp - orighdr + 10, " ");
213         }
214 #endif /* TESTMAIN */
215
216         if (state == ENDIT_ALL)         /* after last address */
217             return(NULL);
218         else if (HEADER_END(hp))
219         {
220             state = ENDIT_ALL;
221             if (tp > address)
222             {
223                 while (isspace(*--tp))
224                     continue;
225                 *++tp = '\0';
226             }
227             return(tp > address ? (tp = address) : (char *)NULL);
228         }
229         else if (*hp == '\\')           /* handle RFC822 escaping */
230         {
231             if (state != INSIDE_PARENS)
232             {
233                 *tp++ = *hp++;                  /* take the escape */
234                 *tp++ = *hp;                    /* take following char */
235             }
236         }
237         else switch (state)
238         {
239         case START_HDR:   /* before header colon */
240             if (*hp == ':')
241                 state = SKIP_JUNK;
242             break;
243
244         case SKIP_JUNK:         /* looking for address start */
245             if (*hp == '"')     /* quoted string */
246             {
247                 oldstate = SKIP_JUNK;
248                 state = INSIDE_DQUOTE;
249                 *tp++ = *hp;
250             }
251             else if (*hp == '(')        /* address comment -- ignore */
252             {
253                 parendepth = 1;
254                 oldstate = SKIP_JUNK;
255                 state = INSIDE_PARENS;    
256             }
257             else if (*hp == '<')        /* begin <address> */
258             {
259                 state = INSIDE_BRACKETS;
260                 tp = address;
261             }
262             else if (*hp != ',' && !isspace(*hp))
263             {
264                 --hp;
265                 state = BARE_ADDRESS;
266             }
267             break;
268
269         case BARE_ADDRESS:      /* collecting address without delimiters */
270             if (*hp == ',')     /* end of address */
271             {
272                 if (tp > address)
273                 {
274                     *tp++ = '\0';
275                     state = SKIP_JUNK;
276                     return(tp = address);
277                 }
278             }
279             else if (*hp == '(')        /* beginning of comment */
280             {
281                 parendepth = 1;
282                 oldstate = BARE_ADDRESS;
283                 state = INSIDE_PARENS;    
284             }
285             else if (*hp == '<')        /* beginning of real address */
286             {
287                 state = INSIDE_BRACKETS;
288                 tp = address;
289             }
290             else if (!isspace(*hp))     /* just take it, ignoring whitespace */
291                 *tp++ = *hp;
292             break;
293
294         case INSIDE_DQUOTE:     /* we're in a quoted string, copy verbatim */
295             if (*hp != '"')
296                 *tp++ = *hp;
297             else
298             {
299                 *tp++ = *hp;
300                 state = oldstate;
301             }
302             break;
303
304         case INSIDE_PARENS:     /* we're in a parenthesized comment, ignore */
305             if (*hp == '(')
306                 ++parendepth;
307             else if (*hp == ')')
308                 --parendepth;
309             if (parendepth == 0)
310                 state = oldstate;
311             break;
312
313         case INSIDE_BRACKETS:   /* possible <>-enclosed address */
314             if (*hp == '>')     /* end of address */
315             {
316                 *tp++ = '\0';
317                 state = SKIP_JUNK;
318                 ++hp;
319                 return(tp = address);
320             }
321             else if (*hp == '<')        /* nested <> */
322                 tp = address;
323             else if (*hp == '"')        /* quoted address */
324             {
325                 *tp++ = *hp;
326                 oldstate = INSIDE_BRACKETS;
327                 state = INSIDE_DQUOTE;
328             }
329             else                        /* just copy address */
330                 *tp++ = *hp;
331             break;
332         }
333     }
334
335     return(NULL);
336 }
337
338 #ifdef TESTMAIN
339 static void parsebuf(char *longbuf, int reply)
340 {
341     char        *cp;
342
343     if (reply)
344     {
345         reply_hack(longbuf, "HOSTNAME.NET");
346         printf("Rewritten buffer: %s", longbuf);
347     }
348     else
349         if ((cp = nxtaddr(longbuf)) != (char *)NULL)
350             do {
351                 printf("\t-> \"%s\"\n", cp);
352             } while
353                 ((cp = nxtaddr((char *)NULL)) != (char *)NULL);
354 }
355
356
357
358 main(int argc, char *argv[])
359 {
360     char        buf[MSGBUFSIZE], longbuf[BUFSIZ];
361     int         ch, reply;
362     
363     verbose = reply = FALSE;
364     while ((ch = getopt(argc, argv, "rv")) != EOF)
365         switch(ch)
366         {
367         case 'r':
368             reply = TRUE;
369             break;
370
371         case 'v':
372             verbose = TRUE;
373             break;
374         }
375
376     while (fgets(buf, sizeof(buf)-1, stdin))
377     {
378         if (buf[0] == ' ' || buf[0] == '\t')
379             strcat(longbuf, buf);
380         else if (!strncasecmp("From: ", buf, 6)
381                     || !strncasecmp("To: ", buf, 4)
382                     || !strncasecmp("Reply-", buf, 6)
383                     || !strncasecmp("Cc: ", buf, 4)
384                     || !strncasecmp("Bcc: ", buf, 5))
385             strcpy(longbuf, buf);       
386         else if (longbuf[0])
387         {
388             if (verbose)
389                 fputs(longbuf, stdout);
390             parsebuf(longbuf, reply);
391             longbuf[0] = '\0';
392         }
393     }
394     if (longbuf[0])
395     {
396         if (verbose)
397             fputs(longbuf, stdout);
398         parsebuf(longbuf, reply);
399     }
400 }
401 #endif /* TESTMAIN */
402
403 /* rfc822.c end */