Postfix3.3.1
全て データ構造 ファイル 関数 変数 型定義 マクロ定義
dict_pcre.c
[詳解]
1 /*++
2 /* NAME
3 /* dict_pcre 3
4 /* SUMMARY
5 /* dictionary manager interface to PCRE regular expression library
6 /* SYNOPSIS
7 /* #include <dict_pcre.h>
8 /*
9 /* DICT *dict_pcre_open(name, dummy, dict_flags)
10 /* const char *name;
11 /* int dummy;
12 /* int dict_flags;
13 /* DESCRIPTION
14 /* dict_pcre_open() opens the named file and compiles the contained
15 /* regular expressions. The result object can be used to match strings
16 /* against the table.
17 /* SEE ALSO
18 /* dict(3) generic dictionary manager
19 /* AUTHOR(S)
20 /* Andrew McNamara
21 /* andrewm@connect.com.au
22 /* connect.com.au Pty. Ltd.
23 /* Level 3, 213 Miller St
24 /* North Sydney, NSW, Australia
25 /*
26 /* Wietse Venema
27 /* IBM T.J. Watson Research
28 /* P.O. Box 704
29 /* Yorktown Heights, NY 10598, USA
30 /*
31 /* Wietse Venema
32 /* Google, Inc.
33 /* 111 8th Avenue
34 /* New York, NY 10011, USA
35 /*--*/
36 
37 #include "sys_defs.h"
38 
39 #ifdef HAS_PCRE
40 
41 /* System library. */
42 
43 #include <sys/stat.h>
44 #include <stdio.h> /* sprintf() prototype */
45 #include <stdlib.h>
46 #include <unistd.h>
47 #include <string.h>
48 #include <ctype.h>
49 
50 #ifdef STRCASECMP_IN_STRINGS_H
51 #include <strings.h>
52 #endif
53 
54 /* Utility library. */
55 
56 #include "mymalloc.h"
57 #include "msg.h"
58 #include "safe.h"
59 #include "vstream.h"
60 #include "vstring.h"
61 #include "stringops.h"
62 #include "readlline.h"
63 #include "dict.h"
64 #include "dict_pcre.h"
65 #include "mac_parse.h"
66 #include "pcre.h"
67 #include "warn_stat.h"
68 #include "mvect.h"
69 
70  /*
71  * Backwards compatibility.
72  */
73 #ifdef PCRE_STUDY_JIT_COMPILE
74 #define DICT_PCRE_FREE_STUDY(x) pcre_free_study(x)
75 #else
76 #define DICT_PCRE_FREE_STUDY(x) pcre_free((char *) (x))
77 #endif
78 
79  /*
80  * Support for IF/ENDIF based on an idea by Bert Driehuis.
81  */
82 #define DICT_PCRE_OP_MATCH 1 /* Match this regexp */
83 #define DICT_PCRE_OP_IF 2 /* Increase if/endif nesting on match */
84 #define DICT_PCRE_OP_ENDIF 3 /* Decrease if/endif nesting on match */
85 
86  /*
87  * Max strings captured by regexp - essentially the max number of (..)
88  */
89 #define PCRE_MAX_CAPTURE 99
90 
91  /*
92  * Regular expression before and after compilation.
93  */
94 typedef struct {
95  char *regexp; /* regular expression */
96  int options; /* options */
97  int match; /* positive or negative match */
98 } DICT_PCRE_REGEXP;
99 
100 typedef struct {
101  pcre *pattern; /* the compiled pattern */
102  pcre_extra *hints; /* hints to speed pattern execution */
103 } DICT_PCRE_ENGINE;
104 
105  /*
106  * Compiled generic rule, and subclasses that derive from it.
107  */
108 typedef struct DICT_PCRE_RULE {
109  int op; /* DICT_PCRE_OP_MATCH/IF/ENDIF */
110  int lineno; /* source file line number */
111  struct DICT_PCRE_RULE *next; /* next rule in dict */
112 } DICT_PCRE_RULE;
113 
114 typedef struct {
115  DICT_PCRE_RULE rule; /* generic part */
116  pcre *pattern; /* compiled pattern */
117  pcre_extra *hints; /* hints to speed pattern execution */
118  char *replacement; /* replacement string */
119  int match; /* positive or negative match */
120  size_t max_sub; /* largest $number in replacement */
121 } DICT_PCRE_MATCH_RULE;
122 
123 typedef struct {
124  DICT_PCRE_RULE rule; /* generic members */
125  pcre *pattern; /* compiled pattern */
126  pcre_extra *hints; /* hints to speed pattern execution */
127  int match; /* positive or negative match */
128  struct DICT_PCRE_RULE *endif_rule; /* matching endif rule */
129 } DICT_PCRE_IF_RULE;
130 
131  /*
132  * PCRE map.
133  */
134 typedef struct {
135  DICT dict; /* generic members */
136  DICT_PCRE_RULE *head;
137  VSTRING *expansion_buf; /* lookup result */
138 } DICT_PCRE;
139 
140 static int dict_pcre_init = 0; /* flag need to init pcre library */
141 
142 /*
143  * Context for $number expansion callback.
144  */
145 typedef struct {
146  DICT_PCRE *dict_pcre; /* the dictionary handle */
147  DICT_PCRE_MATCH_RULE *match_rule; /* the rule we matched */
148  const char *lookup_string; /* string against which we match */
149  int offsets[PCRE_MAX_CAPTURE * 3]; /* Cut substrings */
150  int matches; /* Count of cuts */
151 } DICT_PCRE_EXPAND_CONTEXT;
152 
153  /*
154  * Context for $number pre-scan callback.
155  */
156 typedef struct {
157  const char *mapname; /* name of regexp map */
158  int lineno; /* where in file */
159  size_t max_sub; /* Largest $n seen */
160  char *literal; /* constant result, $$ -> $ */
161 } DICT_PCRE_PRESCAN_CONTEXT;
162 
163  /*
164  * Compatibility.
165  */
166 #ifndef MAC_PARSE_OK
167 #define MAC_PARSE_OK 0
168 #endif
169 
170  /*
171  * Macros to make dense code more accessible.
172  */
173 #define NULL_STARTOFFSET (0)
174 #define NULL_EXEC_OPTIONS (0)
175 #define NULL_OVECTOR ((int *) 0)
176 #define NULL_OVECTOR_LENGTH (0)
177 
178 /* dict_pcre_expand - replace $number with matched text */
179 
180 static int dict_pcre_expand(int type, VSTRING *buf, void *ptr)
181 {
182  DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr;
183  DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule;
184  DICT_PCRE *dict_pcre = ctxt->dict_pcre;
185  const char *pp;
186  int n;
187  int ret;
188 
189  /*
190  * Replace $0-${99} with strings cut from matched text.
191  */
192  if (type == MAC_PARSE_VARNAME) {
193  n = atoi(vstring_str(buf));
194  ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets,
195  ctxt->matches, n, &pp);
196  if (ret < 0) {
197  if (ret == PCRE_ERROR_NOSUBSTRING)
198  return (MAC_PARSE_UNDEF);
199  else
200  msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d",
201  dict_pcre->dict.name, match_rule->rule.lineno, ret);
202  }
203  if (*pp == 0) {
204  myfree((void *) pp);
205  return (MAC_PARSE_UNDEF);
206  }
207  vstring_strcat(dict_pcre->expansion_buf, pp);
208  myfree((void *) pp);
209  return (MAC_PARSE_OK);
210  }
211 
212  /*
213  * Straight text - duplicate with no substitution.
214  */
215  else {
216  vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf));
217  return (MAC_PARSE_OK);
218  }
219 }
220 
221 /* dict_pcre_exec_error - report matching error */
222 
223 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval)
224 {
225  switch (errval) {
226  case 0:
227  msg_warn("pcre map %s, line %d: too many (...)",
228  mapname, lineno);
229  return;
230  case PCRE_ERROR_NULL:
231  case PCRE_ERROR_BADOPTION:
232  msg_warn("pcre map %s, line %d: bad args to re_exec",
233  mapname, lineno);
234  return;
235  case PCRE_ERROR_BADMAGIC:
236  case PCRE_ERROR_UNKNOWN_NODE:
237  msg_warn("pcre map %s, line %d: corrupt compiled regexp",
238  mapname, lineno);
239  return;
240 #ifdef PCRE_ERROR_NOMEMORY
241  case PCRE_ERROR_NOMEMORY:
242  msg_warn("pcre map %s, line %d: out of memory",
243  mapname, lineno);
244  return;
245 #endif
246 #ifdef PCRE_ERROR_MATCHLIMIT
247  case PCRE_ERROR_MATCHLIMIT:
248  msg_warn("pcre map %s, line %d: backtracking limit exceeded",
249  mapname, lineno);
250  return;
251 #endif
252 #ifdef PCRE_ERROR_BADUTF8
253  case PCRE_ERROR_BADUTF8:
254  msg_warn("pcre map %s, line %d: bad UTF-8 sequence in search string",
255  mapname, lineno);
256  return;
257 #endif
258 #ifdef PCRE_ERROR_BADUTF8_OFFSET
259  case PCRE_ERROR_BADUTF8_OFFSET:
260  msg_warn("pcre map %s, line %d: bad UTF-8 start offset in search string",
261  mapname, lineno);
262  return;
263 #endif
264  default:
265  msg_warn("pcre map %s, line %d: unknown pcre_exec error: %d",
266  mapname, lineno, errval);
267  return;
268  }
269 }
270 
271  /*
272  * Inlined to reduce function call overhead in the time-critical loop.
273  */
274 #define DICT_PCRE_EXEC(ctxt, map, line, pattern, hints, match, str, len) \
275  ((ctxt).matches = pcre_exec((pattern), (hints), (str), (len), \
276  NULL_STARTOFFSET, NULL_EXEC_OPTIONS, \
277  (ctxt).offsets, PCRE_MAX_CAPTURE * 3), \
278  (ctxt).matches > 0 ? (match) : \
279  (ctxt).matches == PCRE_ERROR_NOMATCH ? !(match) : \
280  (dict_pcre_exec_error((map), (line), (ctxt).matches), 0))
281 
282 /* dict_pcre_lookup - match string and perform optional substitution */
283 
284 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string)
285 {
286  DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
287  DICT_PCRE_RULE *rule;
288  DICT_PCRE_IF_RULE *if_rule;
289  DICT_PCRE_MATCH_RULE *match_rule;
290  int lookup_len = strlen(lookup_string);
291  DICT_PCRE_EXPAND_CONTEXT ctxt;
292 
293  dict->error = 0;
294 
295  if (msg_verbose)
296  msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string);
297 
298  /*
299  * Optionally fold the key.
300  */
301  if (dict->flags & DICT_FLAG_FOLD_MUL) {
302  if (dict->fold_buf == 0)
303  dict->fold_buf = vstring_alloc(10);
304  vstring_strcpy(dict->fold_buf, lookup_string);
305  lookup_string = lowercase(vstring_str(dict->fold_buf));
306  }
307  for (rule = dict_pcre->head; rule; rule = rule->next) {
308 
309  switch (rule->op) {
310 
311  /*
312  * Search for a matching expression.
313  */
314  case DICT_PCRE_OP_MATCH:
315  match_rule = (DICT_PCRE_MATCH_RULE *) rule;
316  if (!DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno,
317  match_rule->pattern, match_rule->hints,
318  match_rule->match, lookup_string, lookup_len))
319  continue;
320 
321  /*
322  * Skip $number substitutions when the replacement text contains
323  * no $number strings, as learned during the compile time
324  * pre-scan. The pre-scan already replaced $$ by $.
325  */
326  if (match_rule->max_sub == 0)
327  return match_rule->replacement;
328 
329  /*
330  * We've got a match. Perform substitution on replacement string.
331  */
332  if (dict_pcre->expansion_buf == 0)
333  dict_pcre->expansion_buf = vstring_alloc(10);
334  VSTRING_RESET(dict_pcre->expansion_buf);
335  ctxt.dict_pcre = dict_pcre;
336  ctxt.match_rule = match_rule;
337  ctxt.lookup_string = lookup_string;
338 
339  if (mac_parse(match_rule->replacement, dict_pcre_expand,
340  (void *) &ctxt) & MAC_PARSE_ERROR)
341  msg_fatal("pcre map %s, line %d: bad replacement syntax",
342  dict->name, rule->lineno);
343 
344  VSTRING_TERMINATE(dict_pcre->expansion_buf);
345  return (vstring_str(dict_pcre->expansion_buf));
346 
347  /*
348  * Conditional. XXX We provide space for matched substring info
349  * because PCRE uses part of it as workspace for backtracking.
350  * PCRE will allocate memory if it runs out of backtracking
351  * storage.
352  */
353  case DICT_PCRE_OP_IF:
354  if_rule = (DICT_PCRE_IF_RULE *) rule;
355  if (DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno,
356  if_rule->pattern, if_rule->hints,
357  if_rule->match, lookup_string, lookup_len))
358  continue;
359  /* An IF without matching ENDIF has no "endif" rule. */
360  if ((rule = if_rule->endif_rule) == 0)
361  return (0);
362  /* FALLTHROUGH */
363 
364  /*
365  * ENDIF after IF.
366  */
367  case DICT_PCRE_OP_ENDIF:
368  continue;
369 
370  default:
371  msg_panic("dict_pcre_lookup: impossible operation %d", rule->op);
372  }
373  }
374  return (0);
375 }
376 
377 /* dict_pcre_close - close pcre dictionary */
378 
379 static void dict_pcre_close(DICT *dict)
380 {
381  DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
382  DICT_PCRE_RULE *rule;
383  DICT_PCRE_RULE *next;
384  DICT_PCRE_MATCH_RULE *match_rule;
385  DICT_PCRE_IF_RULE *if_rule;
386 
387  for (rule = dict_pcre->head; rule; rule = next) {
388  next = rule->next;
389  switch (rule->op) {
390  case DICT_PCRE_OP_MATCH:
391  match_rule = (DICT_PCRE_MATCH_RULE *) rule;
392  if (match_rule->pattern)
393  myfree((void *) match_rule->pattern);
394  if (match_rule->hints)
395  DICT_PCRE_FREE_STUDY(match_rule->hints);
396  if (match_rule->replacement)
397  myfree((void *) match_rule->replacement);
398  break;
399  case DICT_PCRE_OP_IF:
400  if_rule = (DICT_PCRE_IF_RULE *) rule;
401  if (if_rule->pattern)
402  myfree((void *) if_rule->pattern);
403  if (if_rule->hints)
404  DICT_PCRE_FREE_STUDY(if_rule->hints);
405  break;
406  case DICT_PCRE_OP_ENDIF:
407  break;
408  default:
409  msg_panic("dict_pcre_close: unknown operation %d", rule->op);
410  }
411  myfree((void *) rule);
412  }
413  if (dict_pcre->expansion_buf)
414  vstring_free(dict_pcre->expansion_buf);
415  if (dict->fold_buf)
416  vstring_free(dict->fold_buf);
417  dict_free(dict);
418 }
419 
420 /* dict_pcre_get_pattern - extract pattern from rule */
421 
422 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp,
423  DICT_PCRE_REGEXP *pattern)
424 {
425  char *p = *bufp;
426  char re_delimiter;
427 
428  /*
429  * Process negation operators.
430  */
431  pattern->match = 1;
432  for (;;) {
433  if (*p == '!')
434  pattern->match = !pattern->match;
435  else if (!ISSPACE(*p))
436  break;
437  p++;
438  }
439  if (*p == 0) {
440  msg_warn("pcre map %s, line %d: no regexp: skipping this rule",
441  mapname, lineno);
442  return (0);
443  }
444  re_delimiter = *p++;
445  pattern->regexp = p;
446 
447  /*
448  * Search for second delimiter, handling backslash escape.
449  */
450  while (*p) {
451  if (*p == '\\') {
452  ++p;
453  if (*p == 0)
454  break;
455  } else if (*p == re_delimiter)
456  break;
457  ++p;
458  }
459 
460  if (!*p) {
461  msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": "
462  "ignoring this rule", mapname, lineno, re_delimiter);
463  return (0);
464  }
465  *p++ = 0; /* Null term the regexp */
466 
467  /*
468  * Parse any regexp options.
469  */
470  pattern->options = PCRE_CASELESS | PCRE_DOTALL;
471  while (*p && !ISSPACE(*p)) {
472  switch (*p) {
473  case 'i':
474  pattern->options ^= PCRE_CASELESS;
475  break;
476  case 'm':
477  pattern->options ^= PCRE_MULTILINE;
478  break;
479  case 's':
480  pattern->options ^= PCRE_DOTALL;
481  break;
482  case 'x':
483  pattern->options ^= PCRE_EXTENDED;
484  break;
485  case 'A':
486  pattern->options ^= PCRE_ANCHORED;
487  break;
488  case 'E':
489  pattern->options ^= PCRE_DOLLAR_ENDONLY;
490  break;
491  case 'U':
492  pattern->options ^= PCRE_UNGREEDY;
493  break;
494  case 'X':
495  pattern->options ^= PCRE_EXTRA;
496  break;
497  default:
498  msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": "
499  "skipping this rule", mapname, lineno, *p);
500  return (0);
501  }
502  ++p;
503  }
504  *bufp = p;
505  return (1);
506 }
507 
508 /* dict_pcre_prescan - sanity check $number instances in replacement text */
509 
510 static int dict_pcre_prescan(int type, VSTRING *buf, void *context)
511 {
512  DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context;
513  size_t n;
514 
515  /*
516  * Keep a copy of literal text (with $$ already replaced by $) if and
517  * only if the replacement text contains no $number expression. This way
518  * we can avoid having to scan the replacement text at lookup time.
519  */
520  if (type == MAC_PARSE_VARNAME) {
521  if (ctxt->literal) {
522  myfree(ctxt->literal);
523  ctxt->literal = 0;
524  }
525  if (!alldig(vstring_str(buf))) {
526  msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"",
527  ctxt->mapname, ctxt->lineno, vstring_str(buf));
528  return (MAC_PARSE_ERROR);
529  }
530  n = atoi(vstring_str(buf));
531  if (n < 1) {
532  msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"",
533  ctxt->mapname, ctxt->lineno, vstring_str(buf));
534  return (MAC_PARSE_ERROR);
535  }
536  if (n > ctxt->max_sub)
537  ctxt->max_sub = n;
538  } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
539  if (ctxt->literal)
540  msg_panic("pcre map %s, line %d: multiple literals but no $number",
541  ctxt->mapname, ctxt->lineno);
542  ctxt->literal = mystrdup(vstring_str(buf));
543  }
544  return (MAC_PARSE_OK);
545 }
546 
547 /* dict_pcre_compile - compile pattern */
548 
549 static int dict_pcre_compile(const char *mapname, int lineno,
550  DICT_PCRE_REGEXP *pattern,
551  DICT_PCRE_ENGINE *engine)
552 {
553  const char *error;
554  int errptr;
555 
556  engine->pattern = pcre_compile(pattern->regexp, pattern->options,
557  &error, &errptr, NULL);
558  if (engine->pattern == 0) {
559  msg_warn("pcre map %s, line %d: error in regex at offset %d: %s",
560  mapname, lineno, errptr, error);
561  return (0);
562  }
563  engine->hints = pcre_study(engine->pattern, 0, &error);
564  if (error != 0) {
565  msg_warn("pcre map %s, line %d: error while studying regex: %s",
566  mapname, lineno, error);
567  myfree((void *) engine->pattern);
568  return (0);
569  }
570  return (1);
571 }
572 
573 /* dict_pcre_rule_alloc - fill in a generic rule structure */
574 
575 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int lineno, size_t size)
576 {
577  DICT_PCRE_RULE *rule;
578 
579  rule = (DICT_PCRE_RULE *) mymalloc(size);
580  rule->op = op;
581  rule->lineno = lineno;
582  rule->next = 0;
583 
584  return (rule);
585 }
586 
587 /* dict_pcre_parse_rule - parse and compile one rule */
588 
589 static DICT_PCRE_RULE *dict_pcre_parse_rule(const char *mapname, int lineno,
590  char *line, int nesting,
591  int dict_flags)
592 {
593  char *p;
594  int actual_sub;
595 
596  p = line;
597 
598  /*
599  * An ordinary match rule takes one pattern and replacement text.
600  */
601  if (!ISALNUM(*p)) {
602  DICT_PCRE_REGEXP regexp;
603  DICT_PCRE_ENGINE engine;
604  DICT_PCRE_PRESCAN_CONTEXT prescan_context;
605  DICT_PCRE_MATCH_RULE *match_rule;
606 
607  /*
608  * Get the pattern string and options.
609  */
610  if (dict_pcre_get_pattern(mapname, lineno, &p, &regexp) == 0)
611  return (0);
612 
613  /*
614  * Get the replacement text.
615  */
616  while (*p && ISSPACE(*p))
617  ++p;
618  if (!*p)
619  msg_warn("pcre map %s, line %d: no replacement text: "
620  "using empty string", mapname, lineno);
621 
622  /*
623  * Sanity check the $number instances in the replacement text.
624  */
625  prescan_context.mapname = mapname;
626  prescan_context.lineno = lineno;
627  prescan_context.max_sub = 0;
628  prescan_context.literal = 0;
629 
630  /*
631  * The optimizer will eliminate code duplication and/or dead code.
632  */
633 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
634  if (prescan_context.literal) \
635  myfree(prescan_context.literal); \
636  return (rval); \
637  } while (0)
638 
639  if (mac_parse(p, dict_pcre_prescan, (void *) &prescan_context)
640  & MAC_PARSE_ERROR) {
641  msg_warn("pcre map %s, line %d: bad replacement syntax: "
642  "skipping this rule", mapname, lineno);
643  CREATE_MATCHOP_ERROR_RETURN(0);
644  }
645 
646  /*
647  * Substring replacement not possible with negative regexps.
648  */
649  if (prescan_context.max_sub > 0 && regexp.match == 0) {
650  msg_warn("pcre map %s, line %d: $number found in negative match "
651  "replacement text: skipping this rule", mapname, lineno);
652  CREATE_MATCHOP_ERROR_RETURN(0);
653  }
654  if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) {
655  msg_warn("pcre map %s, line %d: "
656  "regular expression substitution is not allowed: "
657  "skipping this rule", mapname, lineno);
658  CREATE_MATCHOP_ERROR_RETURN(0);
659  }
660 
661  /*
662  * Compile the pattern.
663  */
664  if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
665  CREATE_MATCHOP_ERROR_RETURN(0);
666 #ifdef PCRE_INFO_CAPTURECOUNT
667  if (pcre_fullinfo(engine.pattern, engine.hints,
668  PCRE_INFO_CAPTURECOUNT,
669  (void *) &actual_sub) != 0)
670  msg_panic("pcre map %s, line %d: pcre_fullinfo failed",
671  mapname, lineno);
672  if (prescan_context.max_sub > actual_sub) {
673  msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": "
674  "skipping this rule", mapname, lineno,
675  (int) prescan_context.max_sub);
676  if (engine.pattern)
677  myfree((void *) engine.pattern);
678  if (engine.hints)
679  DICT_PCRE_FREE_STUDY(engine.hints);
680  CREATE_MATCHOP_ERROR_RETURN(0);
681  }
682 #endif
683 
684  /*
685  * Save the result.
686  */
687  match_rule = (DICT_PCRE_MATCH_RULE *)
688  dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, lineno,
689  sizeof(DICT_PCRE_MATCH_RULE));
690  match_rule->match = regexp.match;
691  match_rule->max_sub = prescan_context.max_sub;
692  if (prescan_context.literal)
693  match_rule->replacement = prescan_context.literal;
694  else
695  match_rule->replacement = mystrdup(p);
696  match_rule->pattern = engine.pattern;
697  match_rule->hints = engine.hints;
698  return ((DICT_PCRE_RULE *) match_rule);
699  }
700 
701  /*
702  * The IF operator takes one pattern but no replacement text.
703  */
704  else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
705  DICT_PCRE_REGEXP regexp;
706  DICT_PCRE_ENGINE engine;
707  DICT_PCRE_IF_RULE *if_rule;
708 
709  p += 2;
710 
711  /*
712  * Get the pattern.
713  */
714  while (*p && ISSPACE(*p))
715  p++;
716  if (!dict_pcre_get_pattern(mapname, lineno, &p, &regexp))
717  return (0);
718 
719  /*
720  * Warn about out-of-place text.
721  */
722  while (*p && ISSPACE(*p))
723  ++p;
724  if (*p) {
725  msg_warn("pcre map %s, line %d: ignoring extra text after "
726  "IF statement: \"%s\"", mapname, lineno, p);
727  msg_warn("pcre map %s, line %d: do not prepend whitespace"
728  " to statements between IF and ENDIF", mapname, lineno);
729  }
730 
731  /*
732  * Compile the pattern.
733  */
734  if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
735  return (0);
736 
737  /*
738  * Save the result.
739  */
740  if_rule = (DICT_PCRE_IF_RULE *)
741  dict_pcre_rule_alloc(DICT_PCRE_OP_IF, lineno,
742  sizeof(DICT_PCRE_IF_RULE));
743  if_rule->match = regexp.match;
744  if_rule->pattern = engine.pattern;
745  if_rule->hints = engine.hints;
746  if_rule->endif_rule = 0;
747  return ((DICT_PCRE_RULE *) if_rule);
748  }
749 
750  /*
751  * The ENDIF operator takes no patterns and no replacement text.
752  */
753  else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
754  DICT_PCRE_RULE *rule;
755 
756  p += 5;
757 
758  /*
759  * Warn about out-of-place ENDIFs.
760  */
761  if (nesting == 0) {
762  msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF",
763  mapname, lineno);
764  return (0);
765  }
766 
767  /*
768  * Warn about out-of-place text.
769  */
770  while (*p && ISSPACE(*p))
771  ++p;
772  if (*p)
773  msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF",
774  mapname, lineno);
775 
776  /*
777  * Save the result.
778  */
779  rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, lineno,
780  sizeof(DICT_PCRE_RULE));
781  return (rule);
782  }
783 
784  /*
785  * Unrecognized input.
786  */
787  else {
788  msg_warn("pcre map %s, line %d: ignoring unrecognized request",
789  mapname, lineno);
790  return (0);
791  }
792 }
793 
794 /* dict_pcre_open - load and compile a file containing regular expressions */
795 
796 DICT *dict_pcre_open(const char *mapname, int open_flags, int dict_flags)
797 {
798  const char myname[] = "dict_pcre_open";
799  DICT_PCRE *dict_pcre;
800  VSTREAM *map_fp = 0;
801  struct stat st;
802  VSTRING *line_buffer = 0;
803  DICT_PCRE_RULE *last_rule = 0;
804  DICT_PCRE_RULE *rule;
805  int last_line = 0;
806  int lineno;
807  int nesting = 0;
808  char *p;
809  DICT_PCRE_RULE **rule_stack = 0;
810  MVECT mvect;
811 
812  /*
813  * Let the optimizer worry about eliminating redundant code.
814  */
815 #define DICT_PCRE_OPEN_RETURN(d) do { \
816  DICT *__d = (d); \
817  if (map_fp != 0) \
818  vstream_fclose(map_fp); \
819  if (line_buffer != 0) \
820  vstring_free(line_buffer); \
821  return (__d); \
822  } while (0)
823 
824  /*
825  * Sanity checks.
826  */
827  if (open_flags != O_RDONLY)
828  DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname,
829  open_flags, dict_flags,
830  "%s:%s map requires O_RDONLY access mode",
831  DICT_TYPE_PCRE, mapname));
832 
833  /*
834  * Open the configuration file.
835  */
836  if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0)
837  DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname,
838  open_flags, dict_flags,
839  "open %s: %m", mapname));
840  if (fstat(vstream_fileno(map_fp), &st) < 0)
841  msg_fatal("fstat %s: %m", mapname);
842 
843  line_buffer = vstring_alloc(100);
844 
845  dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname,
846  sizeof(*dict_pcre));
847  dict_pcre->dict.lookup = dict_pcre_lookup;
848  dict_pcre->dict.close = dict_pcre_close;
849  dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN;
850  if (dict_flags & DICT_FLAG_FOLD_MUL)
851  dict_pcre->dict.fold_buf = vstring_alloc(10);
852  dict_pcre->head = 0;
853  dict_pcre->expansion_buf = 0;
854 
855  if (dict_pcre_init == 0) {
856  pcre_malloc = (void *(*) (size_t)) mymalloc;
857  pcre_free = (void (*) (void *)) myfree;
858  dict_pcre_init = 1;
859  }
860  dict_pcre->dict.owner.uid = st.st_uid;
861  dict_pcre->dict.owner.status = (st.st_uid != 0);
862 
863  /*
864  * Parse the pcre table.
865  */
866  while (readllines(line_buffer, map_fp, &last_line, &lineno)) {
867  p = vstring_str(line_buffer);
868  trimblanks(p, 0)[0] = 0; /* Trim space at end */
869  if (*p == 0)
870  continue;
871  rule = dict_pcre_parse_rule(mapname, lineno, p, nesting, dict_flags);
872  if (rule == 0)
873  continue;
874  if (rule->op == DICT_PCRE_OP_IF) {
875  if (rule_stack == 0)
876  rule_stack = (DICT_PCRE_RULE **) mvect_alloc(&mvect,
877  sizeof(*rule_stack), nesting + 1,
878  (MVECT_FN) 0, (MVECT_FN) 0);
879  else
880  rule_stack =
881  (DICT_PCRE_RULE **) mvect_realloc(&mvect, nesting + 1);
882  rule_stack[nesting] = rule;
883  nesting++;
884  } else if (rule->op == DICT_PCRE_OP_ENDIF) {
885  DICT_PCRE_IF_RULE *if_rule;
886 
887  if (nesting-- <= 0)
888  /* Already handled in dict_pcre_parse_rule(). */
889  msg_panic("%s: ENDIF without IF", myname);
890  if (rule_stack[nesting]->op != DICT_PCRE_OP_IF)
891  msg_panic("%s: unexpected rule stack element type %d",
892  myname, rule_stack[nesting]->op);
893  if_rule = (DICT_PCRE_IF_RULE *) rule_stack[nesting];
894  if_rule->endif_rule = rule;
895  }
896  if (last_rule == 0)
897  dict_pcre->head = rule;
898  else
899  last_rule->next = rule;
900  last_rule = rule;
901  }
902 
903  while (nesting-- > 0)
904  msg_warn("pcre map %s, line %d: IF has no matching ENDIF",
905  mapname, rule_stack[nesting]->lineno);
906 
907  if (rule_stack)
908  (void) mvect_free(&mvect);
909 
910  DICT_PCRE_OPEN_RETURN(DICT_DEBUG (&dict_pcre->dict));
911 }
912 
913 #endif /* HAS_PCRE */
int msg_verbose
Definition: msg.c:177
void myfree(void *ptr)
Definition: mymalloc.c:207
char * mystrdup(const char *str)
Definition: mymalloc.c:225
Definition: mvect.h:19
NORETURN msg_panic(const char *fmt,...)
Definition: msg.c:295
#define vstring_str(vp)
Definition: vstring.h:71
char * name
Definition: dict.h:80
#define stat(p, s)
Definition: warn_stat.h:18
int flags
Definition: dict.h:81
int alldig(const char *string)
Definition: alldig.c:38
int strncasecmp(const char *s1, const char *s2, size_t n)
Definition: strcasecmp.c:52
VSTRING * vstring_strcpy(VSTRING *vp, const char *src)
Definition: vstring.c:431
VSTREAM * vstream_fopen(const char *path, int flags, mode_t mode)
Definition: vstream.c:1241
#define VSTRING_TERMINATE(vp)
Definition: vstring.h:74
#define ISALNUM(c)
Definition: sys_defs.h:1745
Definition: dict.h:78
#define VSTRING_RESET(vp)
Definition: vstring.h:77
void msg_warn(const char *fmt,...)
Definition: msg.c:215
VSTRING * vstring_alloc(ssize_t len)
Definition: vstring.c:353
int error
Definition: dict.h:94
char * lowercase(char *string)
Definition: lowercase.c:34
const char *(* lookup)(struct DICT *, const char *)
Definition: dict.h:82
char * mvect_free(MVECT *vect)
Definition: mvect.c:111
char * trimblanks(char *, ssize_t)
Definition: trimblanks.c:37
NORETURN msg_fatal(const char *fmt,...)
Definition: msg.c:249
char * mvect_alloc(MVECT *vect, ssize_t elsize, ssize_t nelm, void(*init_fn)(char *, ssize_t), void(*wipe_fn)(char *, ssize_t))
Definition: mvect.c:75
VSTRING * readllines(VSTRING *buf, VSTREAM *fp, int *lineno, int *first_line)
Definition: readlline.c:82
#define DICT_FLAG_PATTERN
Definition: dict.h:115
void(* MVECT_FN)(char *, ssize_t)
Definition: mvect.h:17
int mac_parse(const char *value, MAC_PARSE_FN action, void *context)
Definition: mac_parse.c:85
void dict_free(DICT *)
Definition: dict_alloc.c:163
char * mvect_realloc(MVECT *vect, ssize_t nelm)
Definition: mvect.c:91
DICT * dict_pcre_open(const char *, int, int)
VSTRING * vstring_free(VSTRING *vp)
Definition: vstring.c:380
#define DICT_FLAG_NO_REGSUB
Definition: dict.h:121
#define MAC_PARSE_OK
Definition: mac_parse.h:26
#define MAC_PARSE_UNDEF
Definition: mac_parse.h:28
#define DICT_FLAG_FOLD_MUL
Definition: dict.h:125
#define vstream_fileno(vp)
Definition: vstream.h:115
#define MAC_PARSE_VARNAME
Definition: mac_parse.h:24
#define ISSPACE(c)
Definition: sys_defs.h:1753
DICT * dict_alloc(const char *, const char *, ssize_t)
Definition: dict_alloc.c:135
VSTRING * fold_buf
Definition: dict.h:92
#define MAC_PARSE_LITERAL
Definition: mac_parse.h:22
#define DICT_TYPE_PCRE
Definition: dict_pcre.h:22
VSTRING * vstring_strcat(VSTRING *vp, const char *src)
Definition: vstring.c:459
DICT * dict_surrogate(const char *dict_type, const char *dict_name, int open_flags, int dict_flags, const char *fmt,...)
#define fstat(f, s)
Definition: warn_stat.h:20
#define MAC_PARSE_ERROR
Definition: mac_parse.h:27
void * mymalloc(ssize_t len)
Definition: mymalloc.c:150
void msg_info(const char *fmt,...)
Definition: msg.c:199