Postfix3.3.1
tok822_parse.c
[詳解]
1 /*++
2 /* NAME
3 /* tok822_parse 3
4 /* SUMMARY
5 /* RFC 822 address parser
6 /* SYNOPSIS
7 /* #include <tok822.h>
8 /*
9 /* TOK822 *tok822_scan_limit(str, tailp, limit)
10 /* const char *str;
11 /* TOK822 **tailp;
12 /* int limit;
13 /*
14 /* TOK822 *tok822_scan(str, tailp)
15 /* const char *str;
16 /* TOK822 **tailp;
17 /*
18 /* TOK822 *tok822_parse_limit(str, limit)
19 /* const char *str;
20 /* int limit;
21 /*
22 /* TOK822 *tok822_parse(str)
23 /* const char *str;
24 /*
25 /* TOK822 *tok822_scan_addr(str)
26 /* const char *str;
27 /*
28 /* VSTRING *tok822_externalize(buffer, tree, flags)
29 /* VSTRING *buffer;
30 /* TOK822 *tree;
31 /* int flags;
32 /*
33 /* VSTRING *tok822_internalize(buffer, tree, flags)
34 /* VSTRING *buffer;
35 /* TOK822 *tree;
36 /* int flags;
37 /* DESCRIPTION
38 /* This module converts address lists between string form and parse
39 /* tree formats. The string form can appear in two different ways:
40 /* external (or quoted) form, as used in message headers, and internal
41 /* (unquoted) form, as used internally by the mail software.
42 /* Although RFC 822 expects 7-bit data, these routines pay no
43 /* special attention to 8-bit characters.
44 /*
45 /* tok822_scan() converts the external-form string in \fIstr\fR
46 /* to a linear token list. The \fItailp\fR argument is a null pointer
47 /* or receives the pointer value of the last result list element.
48 /*
49 /* tok822_scan_limit() implements tok822_scan(), which is a macro.
50 /* The \fIlimit\fR argument is either zero or an upper bound on the
51 /* number of tokens produced.
52 /*
53 /* tok822_parse() converts the external-form address list in
54 /* \fIstr\fR to the corresponding token tree. The parser is permissive
55 /* and will not throw away information that it does not understand.
56 /* The parser adds missing commas between addresses.
57 /*
58 /* tok822_parse_limit() implements tok822_parse(), which is a macro.
59 /* The \fIlimit\fR argument is either zero or an upper bound on the
60 /* number of tokens produced.
61 /*
62 /* tok822_scan_addr() converts the external-form string in
63 /* \fIstr\fR to an address token tree. This is just string to
64 /* token list conversion; no parsing is done. This routine is
65 /* suitable for data that should contain just one address and no
66 /* other information.
67 /*
68 /* tok822_externalize() converts a token list to external form.
69 /* Where appropriate, characters and strings are quoted and white
70 /* space is inserted. The \fIflags\fR argument is the binary OR of
71 /* zero or more of the following:
72 /* .IP TOK822_STR_WIPE
73 /* Initially, truncate the result to zero length.
74 /* .IP TOK822_STR_TERM
75 /* Append a null terminator to the result when done.
76 /* .IP TOK822_STR_LINE
77 /* Append a line break after each comma token, instead of appending
78 /* whitespace. It is up to the caller to concatenate short lines to
79 /* produce longer ones.
80 /* .IP TOK822_STR_TRNC
81 /* Truncate non-address information to 250 characters per address, to
82 /* protect Sendmail systems that are vulnerable to the problem in CERT
83 /* advisory CA-2003-07.
84 /* This flag has effect with tok822_externalize() only.
85 /* .PP
86 /* The macro TOK_822_NONE expresses that none of the above features
87 /* should be activated.
88 /*
89 /* The macro TOK822_STR_DEFL combines the TOK822_STR_WIPE and
90 /* TOK822_STR_TERM flags. This is useful for most token to string
91 /* conversions.
92 /*
93 /* The macro TOK822_STR_HEAD combines the TOK822_STR_TERM,
94 /* TOK822_STR_LINE and TOK822_STR_TRNC flags. This is useful for
95 /* the special case of token to mail header conversion.
96 /*
97 /* tok822_internalize() converts a token list to string form,
98 /* without quoting. White space is inserted where appropriate.
99 /* The \fIflags\fR argument is as with tok822_externalize().
100 /* STANDARDS
101 /* .ad
102 /* .fi
103 /* RFC 822 (ARPA Internet Text Messages). In addition to this standard
104 /* this module implements additional operators such as % and !. These
105 /* are needed because the real world is not all RFC 822. Also, the ':'
106 /* operator is allowed to appear inside addresses, to accommodate DECnet.
107 /* In addition, 8-bit data is not given special treatment.
108 /* LICENSE
109 /* .ad
110 /* .fi
111 /* The Secure Mailer license must be distributed with this software.
112 /* AUTHOR(S)
113 /* Wietse Venema
114 /* IBM T.J. Watson Research
115 /* P.O. Box 704
116 /* Yorktown Heights, NY 10598, USA
117 /*--*/
118 
119 /* System library. */
120 
121 #include <sys_defs.h>
122 #include <ctype.h>
123 #include <string.h>
124 
125 /* Utility library. */
126 
127 #include <vstring.h>
128 #include <msg.h>
129 #include <stringops.h>
130 
131 /* Global library. */
132 
133 #include "lex_822.h"
134 #include "quote_822_local.h"
135 #include "tok822.h"
136 
137  /*
138  * I suppose this is my favorite macro. Used heavily for tokenizing.
139  */
140 #define COLLECT(t,s,c,cond) { \
141  while ((c = *(unsigned char *) s) != 0) { \
142  if (c == '\\') { \
143  if ((c = *(unsigned char *)++s) == 0) \
144  break; \
145  } else if (!(cond)) { \
146  break; \
147  } \
148  VSTRING_ADDCH(t->vstr, IS_SPACE_TAB_CR_LF(c) ? ' ' : c); \
149  s++; \
150  } \
151  VSTRING_TERMINATE(t->vstr); \
152  }
153 
154 #define COLLECT_SKIP_LAST(t,s,c,cond) { COLLECT(t,s,c,cond); if (*s) s++; }
155 
156  /*
157  * Not quite as complex. The parser depends heavily on it.
158  */
159 #define SKIP(tp, cond) { \
160  while (tp->type && (cond)) \
161  tp = tp->prev; \
162  }
163 
164 #define MOVE_COMMENT_AND_CONTINUE(tp, right) { \
165  TOK822 *prev = tok822_unlink(tp); \
166  right = tok822_prepend(right, tp); \
167  tp = prev; \
168  continue; \
169  }
170 
171 #define SKIP_MOVE_COMMENT(tp, cond, right) { \
172  while (tp->type && (cond)) { \
173  if (tp->type == TOK822_COMMENT) \
174  MOVE_COMMENT_AND_CONTINUE(tp, right); \
175  tp = tp->prev; \
176  } \
177  }
178 
179  /*
180  * Single-character operators. We include the % and ! operators because not
181  * all the world is RFC822. XXX Make this operator list configurable when we
182  * have a real rewriting language. Include | for aliases file parsing.
183  */
184 static char tok822_opchar[] = "|%!" LEX_822_SPECIALS;
185 static void tok822_quote_atom(TOK822 *);
186 static const char *tok822_comment(TOK822 *, const char *);
187 static TOK822 *tok822_group(int, TOK822 *, TOK822 *, int);
188 static void tok822_copy_quoted(VSTRING *, char *, char *);
189 static int tok822_append_space(TOK822 *);
190 
191 #define DO_WORD (1<<0) /* finding a word is ok here */
192 #define DO_GROUP (1<<1) /* doing an address group */
193 
194 #define ADD_COMMA ',' /* resynchronize */
195 #define NO_MISSING_COMMA 0
196 
197 /* tok822_internalize - token tree to string, internal form */
198 
199 VSTRING *tok822_internalize(VSTRING *vp, TOK822 *tree, int flags)
200 {
201  TOK822 *tp;
202 
203  if (flags & TOK822_STR_WIPE)
204  VSTRING_RESET(vp);
205 
206  for (tp = tree; tp; tp = tp->next) {
207  switch (tp->type) {
208  case ',':
209  VSTRING_ADDCH(vp, tp->type);
210  if (flags & TOK822_STR_LINE) {
211  VSTRING_ADDCH(vp, '\n');
212  continue;
213  }
214  break;
215  case TOK822_ADDR:
217  break;
218  case TOK822_COMMENT:
219  case TOK822_ATOM:
220  case TOK822_QSTRING:
221  vstring_strcat(vp, vstring_str(tp->vstr));
222  break;
223  case TOK822_DOMLIT:
224  VSTRING_ADDCH(vp, '[');
225  vstring_strcat(vp, vstring_str(tp->vstr));
226  VSTRING_ADDCH(vp, ']');
227  break;
228  case TOK822_STARTGRP:
229  VSTRING_ADDCH(vp, ':');
230  break;
231  default:
232  if (tp->type >= TOK822_MINTOK)
233  msg_panic("tok822_internalize: unknown operator %d", tp->type);
234  VSTRING_ADDCH(vp, tp->type);
235  }
236  if (tok822_append_space(tp))
237  VSTRING_ADDCH(vp, ' ');
238  }
239  if (flags & TOK822_STR_TERM)
240  VSTRING_TERMINATE(vp);
241  return (vp);
242 }
243 
244 /* strip_address - strip non-address text from address expression */
245 
246 static void strip_address(VSTRING *vp, ssize_t start, TOK822 *addr)
247 {
248  VSTRING *tmp;
249 
250  /*
251  * Emit plain <address>. Discard any comments or phrases.
252  */
253  VSTRING_TERMINATE(vp);
254  msg_warn("stripping too many comments from address: %.100s...",
255  printable(vstring_str(vp) + start, '?'));
256  vstring_truncate(vp, start);
257  VSTRING_ADDCH(vp, '<');
258  if (addr) {
259  tmp = vstring_alloc(100);
263  vstring_free(tmp);
264  }
265  VSTRING_ADDCH(vp, '>');
266 }
267 
268 /* tok822_externalize - token tree to string, external form */
269 
270 VSTRING *tok822_externalize(VSTRING *vp, TOK822 *tree, int flags)
271 {
272  VSTRING *tmp;
273  TOK822 *tp;
274  ssize_t start;
275  TOK822 *addr;
276  ssize_t addr_len;
277 
278  /*
279  * Guard against a Sendmail buffer overflow (CERT advisory CA-2003-07).
280  * The problem was that Sendmail could store too much non-address text
281  * (comments, phrases, etc.) into a static 256-byte buffer.
282  *
283  * When the buffer fills up, fixed Sendmail versions remove comments etc.
284  * and reduce the information to just <$g>, which expands to <address>.
285  * No change is made when an address expression (text separated by
286  * commas) contains no address. This fix reportedly also protects
287  * Sendmail systems that are still vulnerable to this problem.
288  *
289  * Postfix takes the same approach, grudgingly. To avoid unnecessary damage,
290  * Postfix removes comments etc. only when the amount of non-address text
291  * in an address expression (text separated by commas) exceeds 250 bytes.
292  *
293  * With Sendmail, the address part of an address expression is the
294  * right-most <> instance in that expression. If an address expression
295  * contains no <>, then Postfix guarantees that it contains at most one
296  * non-comment string; that string is the address part of the address
297  * expression, so there is no ambiguity.
298  *
299  * Finally, we note that stress testing shows that other code in Sendmail
300  * 8.12.8 bluntly truncates ``text <address>'' to 256 bytes even when
301  * this means chopping the <address> somewhere in the middle. This is a
302  * loss of control that we're not entirely comfortable with. However,
303  * unbalanced quotes and dangling backslash do not seem to influence the
304  * way that Sendmail parses headers, so this is not an urgent problem.
305  */
306 #define MAX_NONADDR_LENGTH 250
307 
308 #define RESET_NONADDR_LENGTH { \
309  start = VSTRING_LEN(vp); \
310  addr = 0; \
311  addr_len = 0; \
312  }
313 
314 #define ENFORCE_NONADDR_LENGTH do { \
315  if (addr && VSTRING_LEN(vp) - addr_len > start + MAX_NONADDR_LENGTH) \
316  strip_address(vp, start, addr->head); \
317  } while(0)
318 
319  if (flags & TOK822_STR_WIPE)
320  VSTRING_RESET(vp);
321 
322  if (flags & TOK822_STR_TRNC)
324 
325  for (tp = tree; tp; tp = tp->next) {
326  switch (tp->type) {
327  case ',':
328  if (flags & TOK822_STR_TRNC)
330  VSTRING_ADDCH(vp, tp->type);
331  VSTRING_ADDCH(vp, (flags & TOK822_STR_LINE) ? '\n' : ' ');
332  if (flags & TOK822_STR_TRNC)
334  continue;
335 
336  /*
337  * XXX In order to correctly externalize an address, it is not
338  * sufficient to quote individual atoms. There are higher-level
339  * rules that say when an address localpart needs to be quoted.
340  * We wing it with the quote_822_local() routine, which ignores
341  * the issue of atoms in the domain part that would need quoting.
342  */
343  case TOK822_ADDR:
344  addr = tp;
345  tmp = vstring_alloc(100);
347  addr_len = VSTRING_LEN(vp);
350  addr_len = VSTRING_LEN(vp) - addr_len;
351  vstring_free(tmp);
352  break;
353  case TOK822_ATOM:
354  case TOK822_COMMENT:
355  vstring_strcat(vp, vstring_str(tp->vstr));
356  break;
357  case TOK822_QSTRING:
358  VSTRING_ADDCH(vp, '"');
359  tok822_copy_quoted(vp, vstring_str(tp->vstr), "\"\\\r\n");
360  VSTRING_ADDCH(vp, '"');
361  break;
362  case TOK822_DOMLIT:
363  VSTRING_ADDCH(vp, '[');
364  tok822_copy_quoted(vp, vstring_str(tp->vstr), "\\\r\n");
365  VSTRING_ADDCH(vp, ']');
366  break;
367  case TOK822_STARTGRP:
368  VSTRING_ADDCH(vp, ':');
369  break;
370  case '<':
371  if (tp->next && tp->next->type == '>') {
372  addr = tp;
373  addr_len = 0;
374  }
375  VSTRING_ADDCH(vp, '<');
376  break;
377  default:
378  if (tp->type >= TOK822_MINTOK)
379  msg_panic("tok822_externalize: unknown operator %d", tp->type);
380  VSTRING_ADDCH(vp, tp->type);
381  }
382  if (tok822_append_space(tp))
383  VSTRING_ADDCH(vp, ' ');
384  }
385  if (flags & TOK822_STR_TRNC)
387 
388  if (flags & TOK822_STR_TERM)
389  VSTRING_TERMINATE(vp);
390  return (vp);
391 }
392 
393 /* tok822_copy_quoted - copy a string while quoting */
394 
395 static void tok822_copy_quoted(VSTRING *vp, char *str, char *quote_set)
396 {
397  int ch;
398 
399  while ((ch = *(unsigned char *) str++) != 0) {
400  if (strchr(quote_set, ch))
401  VSTRING_ADDCH(vp, '\\');
402  VSTRING_ADDCH(vp, ch);
403  }
404 }
405 
406 /* tok822_append_space - see if space is needed after this token */
407 
408 static int tok822_append_space(TOK822 *tp)
409 {
410  TOK822 *next;
411 
412  if (tp == 0 || (next = tp->next) == 0 || tp->owner != 0)
413  return (0);
414  if (tp->type == ',' || tp->type == TOK822_STARTGRP || next->type == '<')
415  return (1);
416 
417 #define NON_OPERATOR(x) \
418  (x->type == TOK822_ATOM || x->type == TOK822_QSTRING \
419  || x->type == TOK822_COMMENT || x->type == TOK822_DOMLIT \
420  || x->type == TOK822_ADDR)
421 
422  return (NON_OPERATOR(tp) && NON_OPERATOR(next));
423 }
424 
425 /* tok822_scan_limit - tokenize string */
426 
427 TOK822 *tok822_scan_limit(const char *str, TOK822 **tailp, int tok_count_limit)
428 {
429  TOK822 *head = 0;
430  TOK822 *tail = 0;
431  TOK822 *tp;
432  int ch;
433  int tok_count = 0;
434 
435  /*
436  * XXX 2822 new feature: Section 4.1 allows "." to appear in a phrase (to
437  * allow for forms such as: Johnny B. Goode <johhny@domain.org>. I cannot
438  * handle that at the tokenizer level - it is not context sensitive. And
439  * to fix this at the parser level requires radical changes to preserve
440  * white space as part of the token stream. Thanks a lot, people.
441  */
442  while ((ch = *(unsigned char *) str++) != 0) {
443  if (IS_SPACE_TAB_CR_LF(ch))
444  continue;
445  if (ch == '(') {
446  tp = tok822_alloc(TOK822_COMMENT, (char *) 0);
447  str = tok822_comment(tp, str);
448  } else if (ch == '[') {
449  tp = tok822_alloc(TOK822_DOMLIT, (char *) 0);
450  COLLECT_SKIP_LAST(tp, str, ch, ch != ']');
451  } else if (ch == '"') {
452  tp = tok822_alloc(TOK822_QSTRING, (char *) 0);
453  COLLECT_SKIP_LAST(tp, str, ch, ch != '"');
454  } else if (ch != '\\' && strchr(tok822_opchar, ch)) {
455  tp = tok822_alloc(ch, (char *) 0);
456  } else {
457  tp = tok822_alloc(TOK822_ATOM, (char *) 0);
458  str -= 1; /* \ may be first */
459  COLLECT(tp, str, ch, !IS_SPACE_TAB_CR_LF(ch) && !strchr(tok822_opchar, ch));
460  tok822_quote_atom(tp);
461  }
462  if (head == 0) {
463  head = tail = tp;
464  while (tail->next)
465  tail = tail->next;
466  } else {
467  tail = tok822_append(tail, tp);
468  }
469  if (tok_count_limit > 0 && ++tok_count >= tok_count_limit)
470  break;
471  }
472  if (tailp)
473  *tailp = tail;
474  return (head);
475 }
476 
477 /* tok822_parse_limit - translate external string to token tree */
478 
479 TOK822 *tok822_parse_limit(const char *str, int tok_count_limit)
480 {
481  TOK822 *head;
482  TOK822 *tail;
483  TOK822 *right;
484  TOK822 *first_token;
485  TOK822 *last_token;
486  TOK822 *tp;
487  int state;
488 
489  /*
490  * First, tokenize the string, from left to right. We are not allowed to
491  * throw away any information that we do not understand. With a flat
492  * token list that contains all tokens, we can always convert back to
493  * string form.
494  */
495  if ((first_token = tok822_scan_limit(str, &last_token, tok_count_limit)) == 0)
496  return (0);
497 
498  /*
499  * For convenience, sandwich the token list between two sentinel tokens.
500  */
501 #define GLUE(left,rite) { left->next = rite; rite->prev = left; }
502 
503  head = tok822_alloc(0, (char *) 0);
504  GLUE(head, first_token);
505  tail = tok822_alloc(0, (char *) 0);
506  GLUE(last_token, tail);
507 
508  /*
509  * Next step is to transform the token list into a parse tree. This is
510  * done most conveniently from right to left. If there is something that
511  * we do not understand, just leave it alone, don't throw it away. The
512  * address information that we're looking for sits in-between the current
513  * node (tp) and the one called right. Add missing commas on the fly.
514  */
515  state = DO_WORD;
516  right = tail;
517  tp = tail->prev;
518  while (tp->type) {
519  if (tp->type == TOK822_COMMENT) { /* move comment to the side */
520  MOVE_COMMENT_AND_CONTINUE(tp, right);
521  } else if (tp->type == ';') { /* rh side of named group */
522  right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA);
523  state = DO_GROUP | DO_WORD;
524  } else if (tp->type == ':' && (state & DO_GROUP) != 0) {
525  tp->type = TOK822_STARTGRP;
526  (void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA);
527  SKIP(tp, tp->type != ',');
528  right = tp;
529  continue;
530  } else if (tp->type == '>') { /* rh side of <route> */
531  right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA);
532  SKIP_MOVE_COMMENT(tp, tp->type != '<', right);
533  (void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA);
534  SKIP(tp, tp->type > 0xff || strchr(">;,:", tp->type) == 0);
535  right = tp;
536  state |= DO_WORD;
537  continue;
538  } else if (tp->type == TOK822_ATOM || tp->type == TOK822_QSTRING
539  || tp->type == TOK822_DOMLIT) {
540  if ((state & DO_WORD) == 0)
541  right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA)->next;
542  state &= ~DO_WORD;
543  } else if (tp->type == ',') {
544  right = tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA);
545  state |= DO_WORD;
546  } else {
547  state |= DO_WORD;
548  }
549  tp = tp->prev;
550  }
551  (void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA);
552 
553  /*
554  * Discard the sentinel tokens on the left and right extremes. Properly
555  * terminate the resulting list.
556  */
557  tp = (head->next != tail ? head->next : 0);
558  tok822_cut_before(head->next);
559  tok822_free(head);
560  tok822_cut_before(tail);
561  tok822_free(tail);
562  return (tp);
563 }
564 
565 /* tok822_quote_atom - see if an atom needs quoting when externalized */
566 
567 static void tok822_quote_atom(TOK822 *tp)
568 {
569  char *cp;
570  int ch;
571 
572  /*
573  * RFC 822 expects 7-bit data. Rather than quoting every 8-bit character
574  * (and still passing it on as 8-bit data) we leave 8-bit data alone.
575  */
576  for (cp = vstring_str(tp->vstr); (ch = *(unsigned char *) cp) != 0; cp++) {
577  if ( /* !ISASCII(ch) || */ ch == ' '
578  || ISCNTRL(ch) || strchr(tok822_opchar, ch)) {
579  tp->type = TOK822_QSTRING;
580  break;
581  }
582  }
583 }
584 
585 /* tok822_comment - tokenize comment */
586 
587 static const char *tok822_comment(TOK822 *tp, const char *str)
588 {
589  int level = 1;
590  int ch;
591 
592  /*
593  * XXX We cheat by storing comments in their external form. Otherwise it
594  * would be a royal pain to preserve \ before (. That would require a
595  * recursive parser; the easy to implement stack-based recursion would be
596  * too expensive.
597  */
598  VSTRING_ADDCH(tp->vstr, '(');
599 
600  while ((ch = *(unsigned char *) str) != 0) {
601  VSTRING_ADDCH(tp->vstr, ch);
602  str++;
603  if (ch == '(') { /* comments can nest! */
604  level++;
605  } else if (ch == ')') {
606  if (--level == 0)
607  break;
608  } else if (ch == '\\') {
609  if ((ch = *(unsigned char *) str) == 0)
610  break;
611  VSTRING_ADDCH(tp->vstr, ch);
612  str++;
613  }
614  }
615  VSTRING_TERMINATE(tp->vstr);
616  return (str);
617 }
618 
619 /* tok822_group - cluster a group of tokens */
620 
621 static TOK822 *tok822_group(int group_type, TOK822 *left, TOK822 *right, int sync_type)
622 {
623  TOK822 *group;
624  TOK822 *sync;
625  TOK822 *first;
626 
627  /*
628  * Cluster the tokens between left and right under their own parse tree
629  * node. Optionally insert a resync token.
630  */
631  if (left != right && (first = left->next) != right) {
632  tok822_cut_before(right);
633  tok822_cut_before(first);
634  group = tok822_alloc(group_type, (char *) 0);
635  tok822_sub_append(group, first);
636  tok822_append(left, group);
637  tok822_append(group, right);
638  if (sync_type) {
639  sync = tok822_alloc(sync_type, (char *) 0);
640  tok822_append(left, sync);
641  }
642  }
643  return (left);
644 }
645 
646 /* tok822_scan_addr - convert external address string to address token */
647 
648 TOK822 *tok822_scan_addr(const char *addr)
649 {
650  TOK822 *tree = tok822_alloc(TOK822_ADDR, (char *) 0);
651 
652  tree->head = tok822_scan(addr, &tree->tail);
653  return (tree);
654 }
655 
656 #ifdef TEST
657 
658 #include <unistd.h>
659 #include <vstream.h>
660 #include <readlline.h>
661 
662 /* tok822_print - display token */
663 
664 static void tok822_print(TOK822 *list, int indent)
665 {
666  TOK822 *tp;
667 
668  for (tp = list; tp; tp = tp->next) {
669  if (tp->type < TOK822_MINTOK) {
670  vstream_printf("%*s %s \"%c\"\n", indent, "", "OP", tp->type);
671  } else if (tp->type == TOK822_ADDR) {
672  vstream_printf("%*s %s\n", indent, "", "address");
673  tok822_print(tp->head, indent + 2);
674  } else if (tp->type == TOK822_STARTGRP) {
675  vstream_printf("%*s %s\n", indent, "", "group \":\"");
676  } else {
677  vstream_printf("%*s %s \"%s\"\n", indent, "",
678  tp->type == TOK822_COMMENT ? "comment" :
679  tp->type == TOK822_ATOM ? "atom" :
680  tp->type == TOK822_QSTRING ? "quoted string" :
681  tp->type == TOK822_DOMLIT ? "domain literal" :
682  tp->type == TOK822_ADDR ? "address" :
683  "unknown\n", vstring_str(tp->vstr));
684  }
685  }
686 }
687 
688 int main(int unused_argc, char **unused_argv)
689 {
690  VSTRING *vp = vstring_alloc(100);
691  TOK822 *list;
692  VSTRING *buf = vstring_alloc(100);
693 
694 #define TEST_TOKEN_LIMIT 20
695 
696  while (readlline(buf, VSTREAM_IN, (int *) 0)) {
697  while (VSTRING_LEN(buf) > 0 && vstring_end(buf)[-1] == '\n') {
698  vstring_end(buf)[-1] = 0;
699  vstring_truncate(buf, VSTRING_LEN(buf) - 1);
700  }
701  if (!isatty(vstream_fileno(VSTREAM_IN)))
702  vstream_printf(">>>%s<<<\n\n", vstring_str(buf));
703  list = tok822_parse_limit(vstring_str(buf), TEST_TOKEN_LIMIT);
704  vstream_printf("Parse tree:\n");
705  tok822_print(list, 0);
706  vstream_printf("\n");
707 
708  vstream_printf("Internalized:\n%s\n\n",
711  vstream_printf("Externalized, no newlines inserted:\n%s\n\n",
715  vstream_printf("Externalized, newlines inserted:\n%s\n\n",
719  tok822_free_tree(list);
720  }
721  vstring_free(vp);
722  vstring_free(buf);
723  return (0);
724 }
725 
726 #endif
#define QUOTE_FLAG_8BITCLEAN
Definition: quote_flags.h:19
TOK822 * tok822_parse_limit(const char *str, int tok_count_limit)
Definition: tok822_parse.c:479
#define TOK822_ADDR
Definition: tok822.h:46
NORETURN msg_panic(const char *fmt,...)
Definition: msg.c:295
#define TOK822_MINTOK
Definition: tok822.h:41
#define SKIP_MOVE_COMMENT(tp, cond, right)
Definition: tok822_parse.c:171
#define vstring_str(vp)
Definition: vstring.h:71
#define VSTREAM_OUT
Definition: vstream.h:67
#define tok822_scan(cp, ptr)
Definition: tok822.h:83
int main(int argc, char **argv)
Definition: anvil.c:1010
#define COLLECT_SKIP_LAST(t, s, c, cond)
Definition: tok822_parse.c:154
#define SKIP(tp, cond)
Definition: tok822_parse.c:159
Definition: tok822.h:27
VSTRING * vstring_truncate(VSTRING *vp, ssize_t len)
Definition: vstring.c:415
TOK822 * tok822_scan_addr(const char *addr)
Definition: tok822_parse.c:648
#define VSTREAM_IN
Definition: vstream.h:66
#define LEX_822_SPECIALS
Definition: lex_822.h:23
#define NON_OPERATOR(x)
#define RESET_NONADDR_LENGTH
#define VSTRING_LEN(vp)
Definition: vstring.h:72
#define MOVE_COMMENT_AND_CONTINUE(tp, right)
Definition: tok822_parse.c:164
#define TOK822_ATOM
Definition: tok822.h:42
TOK822 * tok822_sub_append(TOK822 *, TOK822 *)
Definition: tok822_tree.c:206
TOK822 * tok822_scan_limit(const char *str, TOK822 **tailp, int tok_count_limit)
Definition: tok822_parse.c:427
TOK822 * tok822_free_tree(TOK822 *)
Definition: tok822_tree.c:262
#define VSTRING_TERMINATE(vp)
Definition: vstring.h:74
VSTRING * quote_822_local_flags(VSTRING *dst, const char *mbox, int flags)
#define vstring_end(vp)
Definition: vstring.h:73
#define IS_SPACE_TAB_CR_LF(ch)
Definition: lex_822.h:18
#define TOK822_DOMLIT
Definition: tok822.h:45
#define VSTRING_ADDCH(vp, ch)
Definition: vstring.h:81
#define GLUE(left, rite)
struct TOK822 * owner
Definition: tok822.h:34
#define TOK822_STR_WIPE
Definition: tok822.h:87
VSTRING * vstr
Definition: tok822.h:29
#define TOK822_COMMENT
Definition: tok822.h:44
struct TOK822 * head
Definition: tok822.h:32
int type
Definition: tok822.h:28
VSTREAM * vstream_printf(const char *fmt,...)
Definition: vstream.c:1335
#define VSTRING_RESET(vp)
Definition: vstring.h:77
#define NO_MISSING_COMMA
Definition: tok822_parse.c:195
void msg_warn(const char *fmt,...)
Definition: msg.c:215
#define DO_WORD
Definition: tok822_parse.c:191
VSTRING * vstring_alloc(ssize_t len)
Definition: vstring.c:353
VSTRING * tok822_internalize(VSTRING *vp, TOK822 *tree, int flags)
Definition: tok822_parse.c:199
VSTRING * tok822_externalize(VSTRING *vp, TOK822 *tree, int flags)
Definition: tok822_parse.c:270
#define DO_GROUP
Definition: tok822_parse.c:192
#define TOK822_STR_TERM
Definition: tok822.h:88
#define ADD_COMMA
Definition: tok822_parse.c:194
#define TOK822_STR_NONE
Definition: tok822.h:86
TOK822 * tok822_append(TOK822 *, TOK822 *)
Definition: tok822_tree.c:127
int vstream_fflush(VSTREAM *stream)
Definition: vstream.c:1257
#define TOK822_STR_TRNC
Definition: tok822.h:90
TOK822 * tok822_alloc(int, const char *)
Definition: tok822_node.c:55
#define ISCNTRL(c)
Definition: sys_defs.h:1747
TOK822 * tok822_free(TOK822 *)
Definition: tok822_node.c:73
#define TOK822_STR_DEFL
Definition: tok822.h:91
#define TOK822_STR_LINE
Definition: tok822.h:89
#define TOK822_QSTRING
Definition: tok822.h:43
#define ENFORCE_NONADDR_LENGTH
VSTRING * vstring_free(VSTRING *vp)
Definition: vstring.c:380
#define vstream_fileno(vp)
Definition: vstream.h:115
TOK822 * tok822_cut_before(TOK822 *)
Definition: tok822_tree.c:165
#define COLLECT(t, s, c, cond)
Definition: tok822_parse.c:140
char * printable(char *string, int replacement)
Definition: printable.c:49
#define TOK822_STARTGRP
Definition: tok822.h:47
struct TOK822 * prev
Definition: tok822.h:30
#define QUOTE_FLAG_APPEND
Definition: quote_flags.h:21
VSTRING * vstring_strcat(VSTRING *vp, const char *src)
Definition: vstring.c:459
struct TOK822 * next
Definition: tok822.h:31
struct TOK822 * tail
Definition: tok822.h:33
#define readlline(bp, fp, lp)
Definition: readlline.h:25