Postfix3.3.1
dict_regexp.c
[詳解]
1 /*++
2 /* NAME
3 /* dict_regexp 3
4 /* SUMMARY
5 /* dictionary manager interface to REGEXP regular expression library
6 /* SYNOPSIS
7 /* #include <dict_regexp.h>
8 /*
9 /* DICT *dict_regexp_open(name, dummy, dict_flags)
10 /* const char *name;
11 /* int dummy;
12 /* int dict_flags;
13 /* DESCRIPTION
14 /* dict_regexp_open() opens the named file and compiles the contained
15 /* regular expressions. The result object can be used to match strings
16 /* against the table.
17 /* SEE ALSO
18 /* dict(3) generic dictionary manager
19 /* regexp_table(5) format of Postfix regular expression tables
20 /* AUTHOR(S)
21 /* LaMont Jones
22 /* lamont@hp.com
23 /*
24 /* Based on PCRE dictionary contributed by Andrew McNamara
25 /* andrewm@connect.com.au
26 /* connect.com.au Pty. Ltd.
27 /* Level 3, 213 Miller St
28 /* North Sydney, NSW, Australia
29 /*
30 /* Heavily rewritten by Wietse Venema
31 /* IBM T.J. Watson Research
32 /* P.O. Box 704
33 /* Yorktown Heights, NY 10598, USA
34 /*
35 /* Wietse Venema
36 /* Google, Inc.
37 /* 111 8th Avenue
38 /* New York, NY 10011, USA
39 /*--*/
40 
41 /* System library. */
42 
43 #include "sys_defs.h"
44 
45 #ifdef HAS_POSIX_REGEXP
46 
47 #include <sys/stat.h>
48 #include <stdlib.h>
49 #include <unistd.h>
50 #include <string.h>
51 #include <ctype.h>
52 #include <regex.h>
53 #ifdef STRCASECMP_IN_STRINGS_H
54 #include <strings.h>
55 #endif
56 
57 /* Utility library. */
58 
59 #include "mymalloc.h"
60 #include "msg.h"
61 #include "safe.h"
62 #include "vstream.h"
63 #include "vstring.h"
64 #include "stringops.h"
65 #include "readlline.h"
66 #include "dict.h"
67 #include "dict_regexp.h"
68 #include "mac_parse.h"
69 #include "warn_stat.h"
70 #include "mvect.h"
71 
72  /*
73  * Support for IF/ENDIF based on an idea by Bert Driehuis.
74  */
75 #define DICT_REGEXP_OP_MATCH 1 /* Match this regexp */
76 #define DICT_REGEXP_OP_IF 2 /* Increase if/endif nesting on match */
77 #define DICT_REGEXP_OP_ENDIF 3 /* Decrease if/endif nesting on match */
78 
79  /*
80  * Regular expression before compiling.
81  */
82 typedef struct {
83  char *regexp; /* regular expression */
84  int options; /* regcomp() options */
85  int match; /* positive or negative match */
86 } DICT_REGEXP_PATTERN;
87 
88  /*
89  * Compiled generic rule, and subclasses that derive from it.
90  */
91 typedef struct DICT_REGEXP_RULE {
92  int op; /* DICT_REGEXP_OP_MATCH/IF/ENDIF */
93  int lineno; /* source file line number */
94  struct DICT_REGEXP_RULE *next; /* next rule in dict */
95 } DICT_REGEXP_RULE;
96 
97 typedef struct {
98  DICT_REGEXP_RULE rule; /* generic part */
99  regex_t *first_exp; /* compiled primary pattern */
100  int first_match; /* positive or negative match */
101  regex_t *second_exp; /* compiled secondary pattern */
102  int second_match; /* positive or negative match */
103  char *replacement; /* replacement text */
104  size_t max_sub; /* largest $number in replacement */
105 } DICT_REGEXP_MATCH_RULE;
106 
107 typedef struct {
108  DICT_REGEXP_RULE rule; /* generic members */
109  regex_t *expr; /* the condition */
110  int match; /* positive or negative match */
111  struct DICT_REGEXP_RULE *endif_rule;/* matching endif rule */
112 } DICT_REGEXP_IF_RULE;
113 
114  /*
115  * Regexp map.
116  */
117 typedef struct {
118  DICT dict; /* generic members */
119  regmatch_t *pmatch; /* matched substring info */
120  DICT_REGEXP_RULE *head; /* first rule */
121  VSTRING *expansion_buf; /* lookup result */
122 } DICT_REGEXP;
123 
124  /*
125  * Macros to make dense code more readable.
126  */
127 #define NULL_SUBSTITUTIONS (0)
128 #define NULL_MATCH_RESULT ((regmatch_t *) 0)
129 
130  /*
131  * Context for $number expansion callback.
132  */
133 typedef struct {
134  DICT_REGEXP *dict_regexp; /* the dictionary handle */
135  DICT_REGEXP_MATCH_RULE *match_rule; /* the rule we matched */
136  const char *lookup_string; /* matched text */
137 } DICT_REGEXP_EXPAND_CONTEXT;
138 
139  /*
140  * Context for $number pre-scan callback.
141  */
142 typedef struct {
143  const char *mapname; /* name of regexp map */
144  int lineno; /* where in file */
145  size_t max_sub; /* largest $number seen */
146  char *literal; /* constant result, $$ -> $ */
147 } DICT_REGEXP_PRESCAN_CONTEXT;
148 
149  /*
150  * Compatibility.
151  */
152 #ifndef MAC_PARSE_OK
153 #define MAC_PARSE_OK 0
154 #endif
155 
156 /* dict_regexp_expand - replace $number with substring from matched text */
157 
158 static int dict_regexp_expand(int type, VSTRING *buf, void *ptr)
159 {
160  DICT_REGEXP_EXPAND_CONTEXT *ctxt = (DICT_REGEXP_EXPAND_CONTEXT *) ptr;
161  DICT_REGEXP_MATCH_RULE *match_rule = ctxt->match_rule;
162  DICT_REGEXP *dict_regexp = ctxt->dict_regexp;
163  regmatch_t *pmatch;
164  size_t n;
165 
166  /*
167  * Replace $number by the corresponding substring from the matched text.
168  * We pre-scanned the replacement text at compile time, so any out of
169  * range $number means that something impossible has happened.
170  */
171  if (type == MAC_PARSE_VARNAME) {
172  n = atoi(vstring_str(buf));
173  if (n < 1 || n > match_rule->max_sub)
174  msg_panic("regexp map %s, line %d: out of range replacement index \"%s\"",
175  dict_regexp->dict.name, match_rule->rule.lineno,
176  vstring_str(buf));
177  pmatch = dict_regexp->pmatch + n;
178  if (pmatch->rm_so < 0 || pmatch->rm_so == pmatch->rm_eo)
179  return (MAC_PARSE_UNDEF); /* empty or not matched */
180  vstring_strncat(dict_regexp->expansion_buf,
181  ctxt->lookup_string + pmatch->rm_so,
182  pmatch->rm_eo - pmatch->rm_so);
183  return (MAC_PARSE_OK);
184  }
185 
186  /*
187  * Straight text - duplicate with no substitution.
188  */
189  else {
190  vstring_strcat(dict_regexp->expansion_buf, vstring_str(buf));
191  return (MAC_PARSE_OK);
192  }
193 }
194 
195 /* dict_regexp_regerror - report regexp compile/execute error */
196 
197 static void dict_regexp_regerror(const char *mapname, int lineno, int error,
198  const regex_t *expr)
199 {
200  char errbuf[256];
201 
202  (void) regerror(error, expr, errbuf, sizeof(errbuf));
203  msg_warn("regexp map %s, line %d: %s", mapname, lineno, errbuf);
204 }
205 
206  /*
207  * Inlined to reduce function call overhead in the time-critical loop.
208  */
209 #define DICT_REGEXP_REGEXEC(err, map, line, expr, match, str, nsub, pmatch) \
210  ((err) = regexec((expr), (str), (nsub), (pmatch), 0), \
211  ((err) == REG_NOMATCH ? !(match) : \
212  (err) == 0 ? (match) : \
213  (dict_regexp_regerror((map), (line), (err), (expr)), 0)))
214 
215 /* dict_regexp_lookup - match string and perform optional substitution */
216 
217 static const char *dict_regexp_lookup(DICT *dict, const char *lookup_string)
218 {
219  DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
220  DICT_REGEXP_RULE *rule;
221  DICT_REGEXP_IF_RULE *if_rule;
222  DICT_REGEXP_MATCH_RULE *match_rule;
223  DICT_REGEXP_EXPAND_CONTEXT expand_context;
224  int error;
225 
226  dict->error = 0;
227 
228  if (msg_verbose)
229  msg_info("dict_regexp_lookup: %s: %s", dict->name, lookup_string);
230 
231  /*
232  * Optionally fold the key.
233  */
234  if (dict->flags & DICT_FLAG_FOLD_MUL) {
235  if (dict->fold_buf == 0)
236  dict->fold_buf = vstring_alloc(10);
237  vstring_strcpy(dict->fold_buf, lookup_string);
238  lookup_string = lowercase(vstring_str(dict->fold_buf));
239  }
240  for (rule = dict_regexp->head; rule; rule = rule->next) {
241 
242  switch (rule->op) {
243 
244  /*
245  * Search for the first matching primary expression. Limit the
246  * overhead for substring substitution to the bare minimum.
247  */
248  case DICT_REGEXP_OP_MATCH:
249  match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
250  if (!DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
251  match_rule->first_exp,
252  match_rule->first_match,
253  lookup_string,
254  match_rule->max_sub > 0 ?
255  match_rule->max_sub + 1 : 0,
256  dict_regexp->pmatch))
257  continue;
258  if (match_rule->second_exp
259  && !DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
260  match_rule->second_exp,
261  match_rule->second_match,
262  lookup_string,
263  NULL_SUBSTITUTIONS,
264  NULL_MATCH_RESULT))
265  continue;
266 
267  /*
268  * Skip $number substitutions when the replacement text contains
269  * no $number strings, as learned during the compile time
270  * pre-scan. The pre-scan already replaced $$ by $.
271  */
272  if (match_rule->max_sub == 0)
273  return (match_rule->replacement);
274 
275  /*
276  * Perform $number substitutions on the replacement text. We
277  * pre-scanned the replacement text at compile time. Any macro
278  * expansion errors at this point mean something impossible has
279  * happened.
280  */
281  if (!dict_regexp->expansion_buf)
282  dict_regexp->expansion_buf = vstring_alloc(10);
283  VSTRING_RESET(dict_regexp->expansion_buf);
284  expand_context.lookup_string = lookup_string;
285  expand_context.match_rule = match_rule;
286  expand_context.dict_regexp = dict_regexp;
287 
288  if (mac_parse(match_rule->replacement, dict_regexp_expand,
289  (void *) &expand_context) & MAC_PARSE_ERROR)
290  msg_panic("regexp map %s, line %d: bad replacement syntax",
291  dict->name, rule->lineno);
292  VSTRING_TERMINATE(dict_regexp->expansion_buf);
293  return (vstring_str(dict_regexp->expansion_buf));
294 
295  /*
296  * Conditional.
297  */
298  case DICT_REGEXP_OP_IF:
299  if_rule = (DICT_REGEXP_IF_RULE *) rule;
300  if (DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
301  if_rule->expr, if_rule->match, lookup_string,
302  NULL_SUBSTITUTIONS, NULL_MATCH_RESULT))
303  continue;
304  /* An IF without matching ENDIF has no "endif" rule. */
305  if ((rule = if_rule->endif_rule) == 0)
306  return (0);
307  /* FALLTHROUGH */
308 
309  /*
310  * ENDIF after IF.
311  */
312  case DICT_REGEXP_OP_ENDIF:
313  continue;
314 
315  default:
316  msg_panic("dict_regexp_lookup: impossible operation %d", rule->op);
317  }
318  }
319  return (0);
320 }
321 
322 /* dict_regexp_close - close regexp dictionary */
323 
324 static void dict_regexp_close(DICT *dict)
325 {
326  DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
327  DICT_REGEXP_RULE *rule;
328  DICT_REGEXP_RULE *next;
329  DICT_REGEXP_MATCH_RULE *match_rule;
330  DICT_REGEXP_IF_RULE *if_rule;
331 
332  for (rule = dict_regexp->head; rule; rule = next) {
333  next = rule->next;
334  switch (rule->op) {
335  case DICT_REGEXP_OP_MATCH:
336  match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
337  if (match_rule->first_exp) {
338  regfree(match_rule->first_exp);
339  myfree((void *) match_rule->first_exp);
340  }
341  if (match_rule->second_exp) {
342  regfree(match_rule->second_exp);
343  myfree((void *) match_rule->second_exp);
344  }
345  if (match_rule->replacement)
346  myfree((void *) match_rule->replacement);
347  break;
348  case DICT_REGEXP_OP_IF:
349  if_rule = (DICT_REGEXP_IF_RULE *) rule;
350  if (if_rule->expr) {
351  regfree(if_rule->expr);
352  myfree((void *) if_rule->expr);
353  }
354  break;
355  case DICT_REGEXP_OP_ENDIF:
356  break;
357  default:
358  msg_panic("dict_regexp_close: unknown operation %d", rule->op);
359  }
360  myfree((void *) rule);
361  }
362  if (dict_regexp->pmatch)
363  myfree((void *) dict_regexp->pmatch);
364  if (dict_regexp->expansion_buf)
365  vstring_free(dict_regexp->expansion_buf);
366  if (dict->fold_buf)
367  vstring_free(dict->fold_buf);
368  dict_free(dict);
369 }
370 
371 /* dict_regexp_get_pat - extract one pattern with options from rule */
372 
373 static int dict_regexp_get_pat(const char *mapname, int lineno, char **bufp,
374  DICT_REGEXP_PATTERN *pat)
375 {
376  char *p = *bufp;
377  char re_delim;
378 
379  /*
380  * Process negation operators.
381  */
382  pat->match = 1;
383  for (;;) {
384  if (*p == '!')
385  pat->match = !pat->match;
386  else if (!ISSPACE(*p))
387  break;
388  p++;
389  }
390  if (*p == 0) {
391  msg_warn("regexp map %s, line %d: no regexp: skipping this rule",
392  mapname, lineno);
393  return (0);
394  }
395 
396  /*
397  * Search for the closing delimiter, handling backslash escape.
398  */
399  re_delim = *p++;
400  pat->regexp = p;
401  while (*p) {
402  if (*p == '\\') {
403  if (p[1])
404  p++;
405  else
406  break;
407  } else if (*p == re_delim) {
408  break;
409  }
410  ++p;
411  }
412  if (!*p) {
413  msg_warn("regexp map %s, line %d: no closing regexp delimiter \"%c\": "
414  "skipping this rule", mapname, lineno, re_delim);
415  return (0);
416  }
417  *p++ = 0; /* null terminate */
418 
419  /*
420  * Search for options.
421  */
422  pat->options = REG_EXTENDED | REG_ICASE;
423  while (*p && !ISSPACE(*p) && *p != '!') {
424  switch (*p) {
425  case 'i':
426  pat->options ^= REG_ICASE;
427  break;
428  case 'm':
429  pat->options ^= REG_NEWLINE;
430  break;
431  case 'x':
432  pat->options ^= REG_EXTENDED;
433  break;
434  default:
435  msg_warn("regexp map %s, line %d: unknown regexp option \"%c\": "
436  "skipping this rule", mapname, lineno, *p);
437  return (0);
438  }
439  ++p;
440  }
441  *bufp = p;
442  return (1);
443 }
444 
445 /* dict_regexp_get_pats - get the primary and second patterns and flags */
446 
447 static int dict_regexp_get_pats(const char *mapname, int lineno, char **p,
448  DICT_REGEXP_PATTERN *first_pat,
449  DICT_REGEXP_PATTERN *second_pat)
450 {
451 
452  /*
453  * Get the primary and optional secondary patterns and their flags.
454  */
455  if (dict_regexp_get_pat(mapname, lineno, p, first_pat) == 0)
456  return (0);
457  if (**p == '!') {
458 #if 0
459  static int bitrot_warned = 0;
460 
461  if (bitrot_warned == 0) {
462  msg_warn("regexp file %s, line %d: /pattern1/!/pattern2/ goes away,"
463  " use \"if !/pattern2/ ... /pattern1/ ... endif\" instead",
464  mapname, lineno);
465  bitrot_warned = 1;
466  }
467 #endif
468  if (dict_regexp_get_pat(mapname, lineno, p, second_pat) == 0)
469  return (0);
470  } else {
471  second_pat->regexp = 0;
472  }
473  return (1);
474 }
475 
476 /* dict_regexp_prescan - find largest $number in replacement text */
477 
478 static int dict_regexp_prescan(int type, VSTRING *buf, void *context)
479 {
480  DICT_REGEXP_PRESCAN_CONTEXT *ctxt = (DICT_REGEXP_PRESCAN_CONTEXT *) context;
481  size_t n;
482 
483  /*
484  * Keep a copy of literal text (with $$ already replaced by $) if and
485  * only if the replacement text contains no $number expression. This way
486  * we can avoid having to scan the replacement text at lookup time.
487  */
488  if (type == MAC_PARSE_VARNAME) {
489  if (ctxt->literal) {
490  myfree(ctxt->literal);
491  ctxt->literal = 0;
492  }
493  if (!alldig(vstring_str(buf))) {
494  msg_warn("regexp map %s, line %d: non-numeric replacement index \"%s\"",
495  ctxt->mapname, ctxt->lineno, vstring_str(buf));
496  return (MAC_PARSE_ERROR);
497  }
498  n = atoi(vstring_str(buf));
499  if (n < 1) {
500  msg_warn("regexp map %s, line %d: out-of-range replacement index \"%s\"",
501  ctxt->mapname, ctxt->lineno, vstring_str(buf));
502  return (MAC_PARSE_ERROR);
503  }
504  if (n > ctxt->max_sub)
505  ctxt->max_sub = n;
506  } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
507  if (ctxt->literal)
508  msg_panic("regexp map %s, line %d: multiple literals but no $number",
509  ctxt->mapname, ctxt->lineno);
510  ctxt->literal = mystrdup(vstring_str(buf));
511  }
512  return (MAC_PARSE_OK);
513 }
514 
515 /* dict_regexp_compile_pat - compile one pattern */
516 
517 static regex_t *dict_regexp_compile_pat(const char *mapname, int lineno,
518  DICT_REGEXP_PATTERN *pat)
519 {
520  int error;
521  regex_t *expr;
522 
523  expr = (regex_t *) mymalloc(sizeof(*expr));
524  error = regcomp(expr, pat->regexp, pat->options);
525  if (error != 0) {
526  dict_regexp_regerror(mapname, lineno, error, expr);
527  myfree((void *) expr);
528  return (0);
529  }
530  return (expr);
531 }
532 
533 /* dict_regexp_rule_alloc - fill in a generic rule structure */
534 
535 static DICT_REGEXP_RULE *dict_regexp_rule_alloc(int op, int lineno, size_t size)
536 {
537  DICT_REGEXP_RULE *rule;
538 
539  rule = (DICT_REGEXP_RULE *) mymalloc(size);
540  rule->op = op;
541  rule->lineno = lineno;
542  rule->next = 0;
543 
544  return (rule);
545 }
546 
547 /* dict_regexp_parseline - parse one rule */
548 
549 static DICT_REGEXP_RULE *dict_regexp_parseline(const char *mapname, int lineno,
550  char *line, int nesting,
551  int dict_flags)
552 {
553  char *p;
554 
555  p = line;
556 
557  /*
558  * An ordinary rule takes one or two patterns and replacement text.
559  */
560  if (!ISALNUM(*p)) {
561  DICT_REGEXP_PATTERN first_pat;
562  DICT_REGEXP_PATTERN second_pat;
563  DICT_REGEXP_PRESCAN_CONTEXT prescan_context;
564  regex_t *first_exp = 0;
565  regex_t *second_exp;
566  DICT_REGEXP_MATCH_RULE *match_rule;
567 
568  /*
569  * Get the primary and the optional secondary patterns.
570  */
571  if (!dict_regexp_get_pats(mapname, lineno, &p, &first_pat, &second_pat))
572  return (0);
573 
574  /*
575  * Get the replacement text.
576  */
577  while (*p && ISSPACE(*p))
578  ++p;
579  if (!*p) {
580  msg_warn("regexp map %s, line %d: no replacement text: "
581  "using empty string", mapname, lineno);
582  }
583 
584  /*
585  * Find the highest-numbered $number in the replacement text. We can
586  * speed up pattern matching 1) by passing hints to the regexp
587  * compiler, setting the REG_NOSUB flag when the replacement text
588  * contains no $number string; 2) by passing hints to the regexp
589  * execution code, limiting the amount of text that is made available
590  * for substitution.
591  */
592  prescan_context.mapname = mapname;
593  prescan_context.lineno = lineno;
594  prescan_context.max_sub = 0;
595  prescan_context.literal = 0;
596 
597  /*
598  * The optimizer will eliminate code duplication and/or dead code.
599  */
600 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
601  if (first_exp) { \
602  regfree(first_exp); \
603  myfree((void *) first_exp); \
604  } \
605  if (prescan_context.literal) \
606  myfree(prescan_context.literal); \
607  return (rval); \
608  } while (0)
609 
610  if (mac_parse(p, dict_regexp_prescan, (void *) &prescan_context)
611  & MAC_PARSE_ERROR) {
612  msg_warn("regexp map %s, line %d: bad replacement syntax: "
613  "skipping this rule", mapname, lineno);
614  CREATE_MATCHOP_ERROR_RETURN(0);
615  }
616 
617  /*
618  * Compile the primary and the optional secondary pattern. Speed up
619  * execution when no matched text needs to be substituted into the
620  * result string, or when the highest numbered substring is less than
621  * the total number of () subpatterns.
622  */
623  if (prescan_context.max_sub == 0)
624  first_pat.options |= REG_NOSUB;
625  if (prescan_context.max_sub > 0 && first_pat.match == 0) {
626  msg_warn("regexp map %s, line %d: $number found in negative match "
627  "replacement text: skipping this rule", mapname, lineno);
628  CREATE_MATCHOP_ERROR_RETURN(0);
629  }
630  if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) {
631  msg_warn("regexp map %s, line %d: "
632  "regular expression substitution is not allowed: "
633  "skipping this rule", mapname, lineno);
634  CREATE_MATCHOP_ERROR_RETURN(0);
635  }
636  if ((first_exp = dict_regexp_compile_pat(mapname, lineno,
637  &first_pat)) == 0)
638  CREATE_MATCHOP_ERROR_RETURN(0);
639  if (prescan_context.max_sub > first_exp->re_nsub) {
640  msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": "
641  "skipping this rule", mapname, lineno,
642  (int) prescan_context.max_sub);
643  CREATE_MATCHOP_ERROR_RETURN(0);
644  }
645  if (second_pat.regexp != 0) {
646  second_pat.options |= REG_NOSUB;
647  if ((second_exp = dict_regexp_compile_pat(mapname, lineno,
648  &second_pat)) == 0)
649  CREATE_MATCHOP_ERROR_RETURN(0);
650  } else {
651  second_exp = 0;
652  }
653  match_rule = (DICT_REGEXP_MATCH_RULE *)
654  dict_regexp_rule_alloc(DICT_REGEXP_OP_MATCH, lineno,
655  sizeof(DICT_REGEXP_MATCH_RULE));
656  match_rule->first_exp = first_exp;
657  match_rule->first_match = first_pat.match;
658  match_rule->max_sub = prescan_context.max_sub;
659  match_rule->second_exp = second_exp;
660  match_rule->second_match = second_pat.match;
661  if (prescan_context.literal)
662  match_rule->replacement = prescan_context.literal;
663  else
664  match_rule->replacement = mystrdup(p);
665  return ((DICT_REGEXP_RULE *) match_rule);
666  }
667 
668  /*
669  * The IF operator takes one pattern but no replacement text.
670  */
671  else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
672  DICT_REGEXP_PATTERN pattern;
673  regex_t *expr;
674  DICT_REGEXP_IF_RULE *if_rule;
675 
676  p += 2;
677  while (*p && ISSPACE(*p))
678  p++;
679  if (!dict_regexp_get_pat(mapname, lineno, &p, &pattern))
680  return (0);
681  while (*p && ISSPACE(*p))
682  ++p;
683  if (*p) {
684  msg_warn("regexp map %s, line %d: ignoring extra text after"
685  " IF statement: \"%s\"", mapname, lineno, p);
686  msg_warn("regexp map %s, line %d: do not prepend whitespace"
687  " to statements between IF and ENDIF", mapname, lineno);
688  }
689  if ((expr = dict_regexp_compile_pat(mapname, lineno, &pattern)) == 0)
690  return (0);
691  if_rule = (DICT_REGEXP_IF_RULE *)
692  dict_regexp_rule_alloc(DICT_REGEXP_OP_IF, lineno,
693  sizeof(DICT_REGEXP_IF_RULE));
694  if_rule->expr = expr;
695  if_rule->match = pattern.match;
696  if_rule->endif_rule = 0;
697  return ((DICT_REGEXP_RULE *) if_rule);
698  }
699 
700  /*
701  * The ENDIF operator takes no patterns and no replacement text.
702  */
703  else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
704  DICT_REGEXP_RULE *rule;
705 
706  p += 5;
707  if (nesting == 0) {
708  msg_warn("regexp map %s, line %d: ignoring ENDIF without matching IF",
709  mapname, lineno);
710  return (0);
711  }
712  while (*p && ISSPACE(*p))
713  ++p;
714  if (*p)
715  msg_warn("regexp map %s, line %d: ignoring extra text after ENDIF",
716  mapname, lineno);
717  rule = dict_regexp_rule_alloc(DICT_REGEXP_OP_ENDIF, lineno,
718  sizeof(DICT_REGEXP_RULE));
719  return (rule);
720  }
721 
722  /*
723  * Unrecognized input.
724  */
725  else {
726  msg_warn("regexp map %s, line %d: ignoring unrecognized request",
727  mapname, lineno);
728  return (0);
729  }
730 }
731 
732 /* dict_regexp_open - load and compile a file containing regular expressions */
733 
734 DICT *dict_regexp_open(const char *mapname, int open_flags, int dict_flags)
735 {
736  const char myname[] = "dict_regexp_open";
737  DICT_REGEXP *dict_regexp;
738  VSTREAM *map_fp = 0;
739  struct stat st;
740  VSTRING *line_buffer = 0;
741  DICT_REGEXP_RULE *rule;
742  DICT_REGEXP_RULE *last_rule = 0;
743  int lineno;
744  int last_line = 0;
745  size_t max_sub = 0;
746  int nesting = 0;
747  char *p;
748  DICT_REGEXP_RULE **rule_stack = 0;
749  MVECT mvect;
750 
751  /*
752  * Let the optimizer worry about eliminating redundant code.
753  */
754 #define DICT_REGEXP_OPEN_RETURN(d) do { \
755  DICT *__d = (d); \
756  if (line_buffer != 0) \
757  vstring_free(line_buffer); \
758  if (map_fp != 0) \
759  vstream_fclose(map_fp); \
760  return (__d); \
761  } while (0)
762 
763  /*
764  * Sanity checks.
765  */
766  if (open_flags != O_RDONLY)
767  DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP,
768  mapname, open_flags, dict_flags,
769  "%s:%s map requires O_RDONLY access mode",
770  DICT_TYPE_REGEXP, mapname));
771 
772  /*
773  * Open the configuration file.
774  */
775  if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0)
776  DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP, mapname,
777  open_flags, dict_flags,
778  "open %s: %m", mapname));
779  if (fstat(vstream_fileno(map_fp), &st) < 0)
780  msg_fatal("fstat %s: %m", mapname);
781 
782  line_buffer = vstring_alloc(100);
783 
784  dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, mapname,
785  sizeof(*dict_regexp));
786  dict_regexp->dict.lookup = dict_regexp_lookup;
787  dict_regexp->dict.close = dict_regexp_close;
788  dict_regexp->dict.flags = dict_flags | DICT_FLAG_PATTERN;
789  if (dict_flags & DICT_FLAG_FOLD_MUL)
790  dict_regexp->dict.fold_buf = vstring_alloc(10);
791  dict_regexp->head = 0;
792  dict_regexp->pmatch = 0;
793  dict_regexp->expansion_buf = 0;
794  dict_regexp->dict.owner.uid = st.st_uid;
795  dict_regexp->dict.owner.status = (st.st_uid != 0);
796 
797  /*
798  * Parse the regexp table.
799  */
800  while (readllines(line_buffer, map_fp, &last_line, &lineno)) {
801  p = vstring_str(line_buffer);
802  trimblanks(p, 0)[0] = 0;
803  if (*p == 0)
804  continue;
805  rule = dict_regexp_parseline(mapname, lineno, p, nesting, dict_flags);
806  if (rule == 0)
807  continue;
808  if (rule->op == DICT_REGEXP_OP_MATCH) {
809  if (((DICT_REGEXP_MATCH_RULE *) rule)->max_sub > max_sub)
810  max_sub = ((DICT_REGEXP_MATCH_RULE *) rule)->max_sub;
811  } else if (rule->op == DICT_REGEXP_OP_IF) {
812  if (rule_stack == 0)
813  rule_stack = (DICT_REGEXP_RULE **) mvect_alloc(&mvect,
814  sizeof(*rule_stack), nesting + 1,
815  (MVECT_FN) 0, (MVECT_FN) 0);
816  else
817  rule_stack =
818  (DICT_REGEXP_RULE **) mvect_realloc(&mvect, nesting + 1);
819  rule_stack[nesting] = rule;
820  nesting++;
821  } else if (rule->op == DICT_REGEXP_OP_ENDIF) {
822  DICT_REGEXP_IF_RULE *if_rule;
823 
824  if (nesting-- <= 0)
825  /* Already handled in dict_regexp_parseline(). */
826  msg_panic("%s: ENDIF without IF", myname);
827  if (rule_stack[nesting]->op != DICT_REGEXP_OP_IF)
828  msg_panic("%s: unexpected rule stack element type %d",
829  myname, rule_stack[nesting]->op);
830  if_rule = (DICT_REGEXP_IF_RULE *) rule_stack[nesting];
831  if_rule->endif_rule = rule;
832  }
833  if (last_rule == 0)
834  dict_regexp->head = rule;
835  else
836  last_rule->next = rule;
837  last_rule = rule;
838  }
839 
840  while (nesting-- > 0)
841  msg_warn("regexp map %s, line %d: IF has no matching ENDIF",
842  mapname, rule_stack[nesting]->lineno);
843 
844  if (rule_stack)
845  (void) mvect_free(&mvect);
846 
847  /*
848  * Allocate space for only as many matched substrings as used in the
849  * replacement text.
850  */
851  if (max_sub > 0)
852  dict_regexp->pmatch =
853  (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_sub + 1));
854 
855  DICT_REGEXP_OPEN_RETURN(DICT_DEBUG (&dict_regexp->dict));
856 }
857 
858 #endif
int msg_verbose
Definition: msg.c:177
void myfree(void *ptr)
Definition: mymalloc.c:207
char * mystrdup(const char *str)
Definition: mymalloc.c:225
Definition: mvect.h:19
NORETURN msg_panic(const char *fmt,...)
Definition: msg.c:295
#define vstring_str(vp)
Definition: vstring.h:71
char * name
Definition: dict.h:80
#define stat(p, s)
Definition: warn_stat.h:18
int flags
Definition: dict.h:81
VSTRING * vstring_strncat(VSTRING *vp, const char *src, ssize_t len)
Definition: vstring.c:471
int alldig(const char *string)
Definition: alldig.c:38
int strncasecmp(const char *s1, const char *s2, size_t n)
Definition: strcasecmp.c:52
VSTRING * vstring_strcpy(VSTRING *vp, const char *src)
Definition: vstring.c:431
VSTREAM * vstream_fopen(const char *path, int flags, mode_t mode)
Definition: vstream.c:1241
#define VSTRING_TERMINATE(vp)
Definition: vstring.h:74
#define ISALNUM(c)
Definition: sys_defs.h:1745
Definition: dict.h:78
#define VSTRING_RESET(vp)
Definition: vstring.h:77
void msg_warn(const char *fmt,...)
Definition: msg.c:215
VSTRING * vstring_alloc(ssize_t len)
Definition: vstring.c:353
int error
Definition: dict.h:94
char * lowercase(char *string)
Definition: lowercase.c:34
const char *(* lookup)(struct DICT *, const char *)
Definition: dict.h:82
char * mvect_free(MVECT *vect)
Definition: mvect.c:111
DICT * dict_regexp_open(const char *, int, int)
char * trimblanks(char *, ssize_t)
Definition: trimblanks.c:37
NORETURN msg_fatal(const char *fmt,...)
Definition: msg.c:249
char * mvect_alloc(MVECT *vect, ssize_t elsize, ssize_t nelm, void(*init_fn)(char *, ssize_t), void(*wipe_fn)(char *, ssize_t))
Definition: mvect.c:75
VSTRING * readllines(VSTRING *buf, VSTREAM *fp, int *lineno, int *first_line)
Definition: readlline.c:82
#define DICT_FLAG_PATTERN
Definition: dict.h:115
void(* MVECT_FN)(char *, ssize_t)
Definition: mvect.h:17
int mac_parse(const char *value, MAC_PARSE_FN action, void *context)
Definition: mac_parse.c:85
void dict_free(DICT *)
Definition: dict_alloc.c:163
char * mvect_realloc(MVECT *vect, ssize_t nelm)
Definition: mvect.c:91
#define DICT_TYPE_REGEXP
Definition: dict_regexp.h:22
VSTRING * vstring_free(VSTRING *vp)
Definition: vstring.c:380
#define DICT_FLAG_NO_REGSUB
Definition: dict.h:121
#define MAC_PARSE_OK
Definition: mac_parse.h:26
#define MAC_PARSE_UNDEF
Definition: mac_parse.h:28
#define DICT_FLAG_FOLD_MUL
Definition: dict.h:125
#define vstream_fileno(vp)
Definition: vstream.h:115
#define MAC_PARSE_VARNAME
Definition: mac_parse.h:24
#define ISSPACE(c)
Definition: sys_defs.h:1753
DICT * dict_alloc(const char *, const char *, ssize_t)
Definition: dict_alloc.c:135
VSTRING * fold_buf
Definition: dict.h:92
#define MAC_PARSE_LITERAL
Definition: mac_parse.h:22
VSTRING * vstring_strcat(VSTRING *vp, const char *src)
Definition: vstring.c:459
DICT * dict_surrogate(const char *dict_type, const char *dict_name, int open_flags, int dict_flags, const char *fmt,...)
#define fstat(f, s)
Definition: warn_stat.h:20
#define MAC_PARSE_ERROR
Definition: mac_parse.h:27
void * mymalloc(ssize_t len)
Definition: mymalloc.c:150
void msg_info(const char *fmt,...)
Definition: msg.c:199