Postfix3.3.1
header_token.c
[詳解]
1 /*++
2 /* NAME
3 /* header_token 3
4 /* SUMMARY
5 /* mail header parser
6 /* SYNOPSIS
7 /* #include <header_token.h>
8 /*
9 /* typedef struct {
10 /* .in +4
11 /* int type;
12 /* const char *u.value;
13 /* /* ... */
14 /* .in
15 /* } HEADER_TOKEN;
16 /*
17 /* ssize_t header_token(token, token_len, token_buffer, ptr,
18 /* specials, terminator)
19 /* HEADER_TOKEN *token;
20 /* ssize_t token_len;
21 /* VSTRING *token_buffer;
22 /* const char **ptr;
23 /* const char *specials;
24 /* int terminator;
25 /* DESCRIPTION
26 /* This module parses a mail header value (text after field-name:)
27 /* into tokens. The parser understands RFC 822 linear white space,
28 /* quoted-string, comment, control characters, and a set of
29 /* user-specified special characters.
30 /*
31 /* A result token type is one of the following:
32 /* .IP HEADER_TOK_QSTRING
33 /* Quoted string as per RFC 822.
34 /* .IP HEADER_TOK_TOKEN
35 /* Token as per RFC 822, and the special characters supplied by the
36 /* caller.
37 /* .IP other
38 /* The value of a control character or special character.
39 /* .PP
40 /* header_token() tokenizes the input and stops after a user-specified
41 /* terminator (ignoring all tokens that exceed the capacity of
42 /* the result storage), or when it runs out of space for the result.
43 /* The terminator is not stored. The result value is the number of
44 /* tokens stored, or -1 when the input was exhausted before any tokens
45 /* were found.
46 /*
47 /* Arguments:
48 /* .IP token
49 /* Result array of HEADER_TOKEN structures. Token string values
50 /* are pointers to null-terminated substrings in the token_buffer.
51 /* .IP token_len
52 /* Length of the array of HEADER_TOKEN structures.
53 /* .IP token_buffer
54 /* Storage for result token string values.
55 /* .IP ptr
56 /* Input/output read position. The input is a null-terminated string.
57 /* .IP specials
58 /* Special characters according to the relevant RFC, or a
59 /* null pointer (default to the RFC 822 special characters).
60 /* This must include the optional terminator if one is specified.
61 /* .IP terminator
62 /* The special character to stop after, or zero.
63 /* BUGS
64 /* Eight-bit characters are not given special treatment.
65 /* SEE ALSO
66 /* RFC 822 (ARPA Internet Text Messages)
67 /* DIAGNOSTICS
68 /* Fatal errors: memory allocation problem.
69 /* LICENSE
70 /* .ad
71 /* .fi
72 /* The Secure Mailer license must be distributed with this software.
73 /* AUTHOR(S)
74 /* Wietse Venema
75 /* IBM T.J. Watson Research
76 /* P.O. Box 704
77 /* Yorktown Heights, NY 10598, USA
78 /*--*/
79 
80 /* System library. */
81 
82 #include <sys_defs.h>
83 #include <string.h>
84 #include <ctype.h>
85 
86 /* Utility library. */
87 
88 #include <msg.h>
89 #include <vstring.h>
90 
91 /* Global library. */
92 
93 #include <lex_822.h>
94 #include <header_token.h>
95 
96 /* Application-specific. */
97 
98  /*
99  * Silly little macros.
100  */
101 #define STR(x) vstring_str(x)
102 #define LEN(x) VSTRING_LEN(x)
103 #define CU_CHAR_PTR(x) ((const unsigned char *) (x))
104 
105 /* header_token - parse out the next item in a message header */
106 
107 ssize_t header_token(HEADER_TOKEN *token, ssize_t token_len,
108  VSTRING *token_buffer, const char **ptr,
109  const char *user_specials, int user_terminator)
110 {
111  ssize_t comment_level;
112  const unsigned char *cp;
113  ssize_t len;
114  int ch;
115  ssize_t tok_count;
116  ssize_t n;
117 
118  /*
119  * Initialize.
120  */
121  VSTRING_RESET(token_buffer);
122  cp = CU_CHAR_PTR(*ptr);
123  tok_count = 0;
124  if (user_specials == 0)
125  user_specials = LEX_822_SPECIALS;
126 
127  /*
128  * Main parsing loop.
129  *
130  * XXX What was the reason to continue parsing when user_terminator is
131  * specified? Perhaps this was needed at some intermediate stage of
132  * development?
133  */
134  while ((ch = *cp) != 0 && (user_terminator != 0 || tok_count < token_len)) {
135  cp++;
136 
137  /*
138  * Skip RFC 822 linear white space.
139  */
140  if (IS_SPACE_TAB_CR_LF(ch))
141  continue;
142 
143  /*
144  * Terminator.
145  */
146  if (ch == user_terminator)
147  break;
148 
149  /*
150  * Skip RFC 822 comment.
151  */
152  if (ch == '(') {
153  comment_level = 1;
154  while ((ch = *cp) != 0) {
155  cp++;
156  if (ch == '(') { /* comments can nest! */
157  comment_level++;
158  } else if (ch == ')') {
159  if (--comment_level == 0)
160  break;
161  } else if (ch == '\\') {
162  if ((ch = *cp) == 0)
163  break;
164  cp++;
165  }
166  }
167  continue;
168  }
169 
170  /*
171  * Copy quoted text according to RFC 822.
172  */
173  if (ch == '"') {
174  if (tok_count < token_len) {
175  token[tok_count].u.offset = LEN(token_buffer);
176  token[tok_count].type = HEADER_TOK_QSTRING;
177  }
178  while ((ch = *cp) != 0) {
179  cp++;
180  if (ch == '"')
181  break;
182  if (ch == '\n') { /* unfold */
183  if (tok_count < token_len) {
184  len = LEN(token_buffer);
185  while (len > 0
186  && IS_SPACE_TAB_CR_LF(STR(token_buffer)[len - 1]))
187  len--;
188  if (len < LEN(token_buffer))
189  vstring_truncate(token_buffer, len);
190  }
191  continue;
192  }
193  if (ch == '\\') {
194  if ((ch = *cp) == 0)
195  break;
196  cp++;
197  }
198  if (tok_count < token_len)
199  VSTRING_ADDCH(token_buffer, ch);
200  }
201  if (tok_count < token_len) {
202  VSTRING_ADDCH(token_buffer, 0);
203  tok_count++;
204  }
205  continue;
206  }
207 
208  /*
209  * Control, or special.
210  */
211  if (strchr(user_specials, ch) || ISCNTRL(ch)) {
212  if (tok_count < token_len) {
213  token[tok_count].u.offset = LEN(token_buffer);
214  token[tok_count].type = ch;
215  VSTRING_ADDCH(token_buffer, ch);
216  VSTRING_ADDCH(token_buffer, 0);
217  tok_count++;
218  }
219  continue;
220  }
221 
222  /*
223  * Token.
224  */
225  else {
226  if (tok_count < token_len) {
227  token[tok_count].u.offset = LEN(token_buffer);
228  token[tok_count].type = HEADER_TOK_TOKEN;
229  VSTRING_ADDCH(token_buffer, ch);
230  }
231  while ((ch = *cp) != 0 && !IS_SPACE_TAB_CR_LF(ch)
232  && !ISCNTRL(ch) && !strchr(user_specials, ch)) {
233  cp++;
234  if (tok_count < token_len)
235  VSTRING_ADDCH(token_buffer, ch);
236  }
237  if (tok_count < token_len) {
238  VSTRING_ADDCH(token_buffer, 0);
239  tok_count++;
240  }
241  continue;
242  }
243  }
244 
245  /*
246  * Ignore a zero-length item after the last terminator.
247  */
248  if (tok_count == 0 && ch == 0)
249  return (-1);
250 
251  /*
252  * Finalize. Fill in the string pointer array, now that the token buffer
253  * is no longer dynamically reallocated as it grows.
254  */
255  *ptr = (const char *) cp;
256  for (n = 0; n < tok_count; n++)
257  token[n].u.value = STR(token_buffer) + token[n].u.offset;
258 
259  if (msg_verbose)
260  msg_info("header_token: %s %s %s",
261  tok_count > 0 ? token[0].u.value : "",
262  tok_count > 1 ? token[1].u.value : "",
263  tok_count > 2 ? token[2].u.value : "");
264 
265  return (tok_count);
266 }
int msg_verbose
Definition: msg.c:177
VSTRING * vstring_truncate(VSTRING *vp, ssize_t len)
Definition: vstring.c:415
#define LEX_822_SPECIALS
Definition: lex_822.h:23
#define HEADER_TOK_TOKEN
Definition: header_token.h:31
#define IS_SPACE_TAB_CR_LF(ch)
Definition: lex_822.h:18
#define VSTRING_ADDCH(vp, ch)
Definition: vstring.h:81
#define HEADER_TOK_QSTRING
Definition: header_token.h:32
#define STR(x)
Definition: header_token.c:101
#define CU_CHAR_PTR(x)
Definition: header_token.c:103
#define VSTRING_RESET(vp)
Definition: vstring.h:77
union HEADER_TOKEN::@0 u
ssize_t header_token(HEADER_TOKEN *token, ssize_t token_len, VSTRING *token_buffer, const char **ptr, const char *user_specials, int user_terminator)
Definition: header_token.c:107
#define LEN(x)
Definition: header_token.c:102
#define ISCNTRL(c)
Definition: sys_defs.h:1747
const char * value
Definition: header_token.h:26
ssize_t offset
Definition: header_token.h:27
void msg_info(const char *fmt,...)
Definition: msg.c:199