Postfix3.3.1
midna_domain.c
[詳解]
1 /*++
2 /* NAME
3 /* midna_domain 3
4 /* SUMMARY
5 /* ASCII/UTF-8 domain name conversion
6 /* SYNOPSIS
7 /* #include <midna_domain.h>
8 /*
9 /* int midna_domain_cache_size;
10 /* int midna_domain_transitional;
11 /*
12 /* const char *midna_domain_to_ascii(
13 /* const char *name)
14 /*
15 /* const char *midna_domain_to_utf8(
16 /* const char *name)
17 /*
18 /* const char *midna_domain_suffix_to_ascii(
19 /* const char *name)
20 /*
21 /* const char *midna_domain_suffix_to_utf8(
22 /* const char *name)
23 /* DESCRIPTION
24 /* The functions in this module transform domain names from/to
25 /* ASCII and UTF-8 form. The result is cached to avoid repeated
26 /* conversion.
27 /*
28 /* This module builds on the ICU library implementation of the
29 /* UTS #46 specification, using default ICU library options
30 /* because those are likely best tested: with transitional
31 /* processing, with case mapping, with normalization, with
32 /* limited IDNA2003 compatibility, without STD3 ASCII rules.
33 /*
34 /* midna_domain_to_ascii() converts an UTF-8 or ASCII domain
35 /* name to ASCII. The result is a null pointer in case of
36 /* error. This function verifies that the result passes
37 /* valid_hostname().
38 /*
39 /* midna_domain_to_utf8() converts an UTF-8 or ASCII domain
40 /* name to UTF-8. The result is a null pointer in case of
41 /* error. This function verifies that the result, after
42 /* conversion to ASCII, passes valid_hostname().
43 /*
44 /* midna_domain_suffix_to_ascii() and midna_domain_suffix_to_utf8()
45 /* take a name that starts with '.' and otherwise perform the
46 /* same operations as midna_domain_to_ascii() and
47 /* midna_domain_to_utf8().
48 /*
49 /* midna_domain_cache_size specifies the size of the conversion
50 /* result cache. This value is used only once, upon the first
51 /* lookup request.
52 /*
53 /* midna_domain_transitional enables transitional conversion
54 /* between UTF8 and ASCII labels.
55 /* SEE ALSO
56 /* http://unicode.org/reports/tr46/ Unicode IDNA Compatibility processing
57 /* msg(3) diagnostics interface
58 /* DIAGNOSTICS
59 /* Fatal errors: memory allocation problem.
60 /* Warnings: conversion error or result validation error.
61 /* LICENSE
62 /* .ad
63 /* .fi
64 /* The Secure Mailer license must be distributed with this software.
65 /* AUTHOR(S)
66 /* Arnt Gulbrandsen
67 /*
68 /* Wietse Venema
69 /* IBM T.J. Watson Research
70 /* P.O. Box 704
71 /* Yorktown Heights, NY 10598, USA
72 /*
73 /* Wietse Venema
74 /* Google, Inc.
75 /* 111 8th Avenue
76 /* New York, NY 10011, USA
77 /*--*/
78 
79  /*
80  * System library.
81  */
82 #include <sys_defs.h>
83 #include <string.h>
84 #include <ctype.h>
85 
86 #ifndef NO_EAI
87 #include <unicode/uidna.h>
88 
89  /*
90  * Utility library.
91  */
92 #include <mymalloc.h>
93 #include <msg.h>
94 #include <ctable.h>
95 #include <stringops.h>
96 #include <valid_hostname.h>
97 #include <name_mask.h>
98 #include <midna_domain.h>
99 
100  /*
101  * Application-specific.
102  */
103 #define DEF_MIDNA_CACHE_SIZE 256
104 
107 static VSTRING *midna_domain_buf; /* x.suffix */
108 
109 #define STR(x) vstring_str(x)
110 
111 /* midna_domain_strerror - pick one for error reporting */
112 
113 static const char *midna_domain_strerror(UErrorCode error, int info_errors)
114 {
115 
116  /*
117  * XXX The UIDNA_ERROR_EMPTY_LABEL etc. names are defined in an ENUM, so
118  * we can't use #ifdef to dynamically determine which names exist.
119  */
120  static LONG_NAME_MASK uidna_errors[] = {
121  "UIDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL,
122  "UIDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG,
123  "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG,
124  "UIDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN,
125  "UIDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN,
126  "UIDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4,
127  "UIDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK,
128  "UIDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED,
129  "UIDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE,
130  "UIDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT,
131  "UIDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL,
132  "UIDNA_ERROR_BIDI", UIDNA_ERROR_BIDI,
133  "UIDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ,
134  /* The above errors are defined with ICU 46 and later. */
135  0,
136  };
137 
138  if (info_errors) {
139  return (str_long_name_mask_opt((VSTRING *) 0, "idna error",
140  uidna_errors, info_errors,
142  } else {
143  return u_errorName(error);
144  }
145 }
146 
147 /* midna_domain_to_ascii_create - convert domain to ASCII */
148 
149 static void *midna_domain_to_ascii_create(const char *name, void *unused_context)
150 {
151  static const char myname[] = "midna_domain_to_ascii_create";
152  char buf[1024]; /* XXX */
153  UErrorCode error = U_ZERO_ERROR;
154  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
155  UIDNA *idna;
156  int anl;
157 
158  /*
159  * Paranoia: do not expose uidna_*() to unfiltered network data.
160  */
161  if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
162  msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
163  myname, name, "malformed UTF-8");
164  return (0);
165  }
166 
167  /*
168  * Perform the requested conversion.
169  */
170  idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT
171  : UIDNA_NONTRANSITIONAL_TO_ASCII, &error);
172  anl = uidna_nameToASCII_UTF8(idna,
173  name, strlen(name),
174  buf, sizeof(buf) - 1,
175  &info,
176  &error);
177  uidna_close(idna);
178 
179  /*
180  * Paranoia: verify that the result passes valid_hostname(). A quick
181  * check shows that UTS46 ToASCII by default rejects inputs with labels
182  * that start or end in '-', with names or labels that are over-long, or
183  * "fake" A-labels, as required by UTS 46 section 4.1, but we rely on
184  * valid_hostname() on the output side just to be sure.
185  */
186  if (U_SUCCESS(error) && info.errors == 0 && anl > 0) {
187  buf[anl] = 0; /* XXX */
188  if (!valid_hostname(buf, DONT_GRIPE)) {
189  msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
190  myname, name, "malformed ASCII label(s)");
191  return (0);
192  }
193  return (mystrndup(buf, anl));
194  } else {
195  msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
196  myname, name, midna_domain_strerror(error, info.errors));
197  return (0);
198  }
199 }
200 
201 /* midna_domain_to_utf8_create - convert domain to UTF8 */
202 
203 static void *midna_domain_to_utf8_create(const char *name, void *unused_context)
204 {
205  static const char myname[] = "midna_domain_to_utf8_create";
206  char buf[1024]; /* XXX */
207  UErrorCode error = U_ZERO_ERROR;
208  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
209  UIDNA *idna;
210  int anl;
211 
212  /*
213  * Paranoia: do not expose uidna_*() to unfiltered network data.
214  */
215  if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
216  msg_warn("%s: Problem translating domain \"%.100s\" to UTF-8 form: %s",
217  myname, name, "malformed UTF-8");
218  return (0);
219  }
220 
221  /*
222  * Perform the requested conversion.
223  */
224  idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT
225  : UIDNA_NONTRANSITIONAL_TO_UNICODE, &error);
226  anl = uidna_nameToUnicodeUTF8(idna,
227  name, strlen(name),
228  buf, sizeof(buf) - 1,
229  &info,
230  &error);
231  uidna_close(idna);
232 
233  /*
234  * Paranoia: UTS46 toUTF8 by default accepts and produces an over-long
235  * name or a name that contains an over-long NR-LDH label (and perhaps
236  * other invalid forms that are not covered in UTS 46, section 4.1). We
237  * rely on midna_domain_to_ascii() to validate the output.
238  */
239  if (U_SUCCESS(error) && info.errors == 0 && anl > 0) {
240  buf[anl] = 0; /* XXX */
241  if (midna_domain_to_ascii(buf) == 0)
242  return (0);
243  return (mystrndup(buf, anl));
244  } else {
245  msg_warn("%s: Problem translating domain \"%.100s\" to UTF8 form: %s",
246  myname, name, midna_domain_strerror(error, info.errors));
247  return (0);
248  }
249 }
250 
251 /* midna_domain_cache_free - cache element destructor */
252 
253 static void midna_domain_cache_free(void *value, void *unused_context)
254 {
255  if (value)
256  myfree(value);
257 }
258 
259 /* midna_domain_to_ascii - convert name to ASCII */
260 
261 const char *midna_domain_to_ascii(const char *name)
262 {
263  static CTABLE *midna_domain_to_ascii_cache = 0;
264 
265  if (midna_domain_to_ascii_cache == 0)
266  midna_domain_to_ascii_cache = ctable_create(midna_domain_cache_size,
267  midna_domain_to_ascii_create,
268  midna_domain_cache_free,
269  (void *) 0);
270  return (ctable_locate(midna_domain_to_ascii_cache, name));
271 }
272 
273 /* midna_domain_to_utf8 - convert name to UTF8 */
274 
275 const char *midna_domain_to_utf8(const char *name)
276 {
277  static CTABLE *midna_domain_to_utf8_cache = 0;
278 
279  if (midna_domain_to_utf8_cache == 0)
280  midna_domain_to_utf8_cache = ctable_create(midna_domain_cache_size,
281  midna_domain_to_utf8_create,
282  midna_domain_cache_free,
283  (void *) 0);
284  return (ctable_locate(midna_domain_to_utf8_cache, name));
285 }
286 
287 /* midna_domain_suffix_to_ascii - convert .name to ASCII */
288 
289 const char *midna_domain_suffix_to_ascii(const char *suffix)
290 {
291  const char *cache_res;
292 
293  /*
294  * If prepending x to .name causes the result to become too long, then
295  * the suffix is bad.
296  */
297  if (midna_domain_buf == 0)
298  midna_domain_buf = vstring_alloc(100);
299  vstring_sprintf(midna_domain_buf, "x%s", suffix);
300  if ((cache_res = midna_domain_to_ascii(STR(midna_domain_buf))) == 0)
301  return (0);
302  else
303  return (cache_res + 1);
304 }
305 
306 /* midna_domain_suffix_to_utf8 - convert .name to UTF8 */
307 
308 const char *midna_domain_suffix_to_utf8(const char *name)
309 {
310  const char *cache_res;
311 
312  /*
313  * If prepending x to .name causes the result to become too long, then
314  * the suffix is bad.
315  */
316  if (midna_domain_buf == 0)
317  midna_domain_buf = vstring_alloc(100);
318  vstring_sprintf(midna_domain_buf, "x%s", name);
319  if ((cache_res = midna_domain_to_utf8(STR(midna_domain_buf))) == 0)
320  return (0);
321  else
322  return (cache_res + 1);
323 }
324 
325 #ifdef TEST
326 
327  /*
328  * Test program - reads names from stdin, reports invalid names to stderr.
329  */
330 #include <stdlib.h>
331 #include <locale.h>
332 
333 #include <stringops.h> /* XXX util_utf8_enable */
334 #include <vstring.h>
335 #include <vstream.h>
336 #include <vstring_vstream.h>
337 #include <msg_vstream.h>
338 
339 int main(int argc, char **argv)
340 {
341  VSTRING *buffer = vstring_alloc(1);
342  const char *bp;
343  const char *ascii;
344  const char *utf8;
345 
346  if (setlocale(LC_ALL, "C") == 0)
347  msg_fatal("setlocale(LC_ALL, C) failed: %m");
348 
349  msg_vstream_init(argv[0], VSTREAM_ERR);
350  /* msg_verbose = 1; */
351  util_utf8_enable = 1;
352 
353  while (vstring_fgets_nonl(buffer, VSTREAM_IN)) {
354  bp = STR(buffer);
355  msg_info("> %s", bp);
356  while (ISSPACE(*bp))
357  bp++;
358  if (*bp == '#' || *bp == 0)
359  continue;
360  msg_info("unconditional conversions:");
361  utf8 = midna_domain_to_utf8(bp);
362  msg_info("\"%s\" ->utf8 \"%s\"", bp, utf8 ? utf8 : "(error)");
363  ascii = midna_domain_to_ascii(bp);
364  msg_info("\"%s\" ->ascii \"%s\"", bp, ascii ? ascii : "(error)");
365  msg_info("conditional conversions:");
366  if (!allascii(bp)) {
367  if (ascii != 0) {
368  utf8 = midna_domain_to_utf8(ascii);
369  msg_info("\"%s\" ->ascii \"%s\" ->utf8 \"%s\"",
370  bp, ascii, utf8 ? utf8 : "(error)");
371  if (utf8 != 0) {
372  if (strcmp(utf8, bp) != 0)
373  msg_warn("\"%s\" != \"%s\"", bp, utf8);
374  }
375  }
376  } else {
377  if (utf8 != 0) {
378  ascii = midna_domain_to_ascii(utf8);
379  msg_info("\"%s\" ->utf8 \"%s\" ->ascii \"%s\"",
380  bp, utf8, ascii ? ascii : "(error)");
381  if (ascii != 0) {
382  if (strcmp(ascii, bp) != 0)
383  msg_warn("\"%s\" != \"%s\"", bp, ascii);
384  }
385  }
386  }
387  }
388  exit(0);
389 }
390 
391 #endif /* TEST */
392 
393 #endif /* NO_EAI */
#define vstring_fgets_nonl(s, p)
int midna_domain_transitional
Definition: midna_domain.c:106
void myfree(void *ptr)
Definition: mymalloc.c:207
#define CTABLE
Definition: ctable.h:19
const char * str_long_name_mask_opt(VSTRING *buf, const char *context, const LONG_NAME_MASK *table, long mask, int flags)
Definition: name_mask.c:380
int main(int argc, char **argv)
Definition: anvil.c:1010
int valid_utf8_string(const char *, ssize_t)
const void * ctable_locate(CTABLE *cache, const char *key)
Definition: ctable.c:140
#define VSTREAM_IN
Definition: vstream.h:66
int valid_hostname(const char *name, int gripe)
#define STR(x)
Definition: midna_domain.c:109
const char * midna_domain_to_utf8(const char *name)
Definition: midna_domain.c:275
int midna_domain_cache_size
Definition: midna_domain.c:105
void msg_warn(const char *fmt,...)
Definition: msg.c:215
const char * midna_domain_suffix_to_utf8(const char *name)
Definition: midna_domain.c:308
CTABLE * ctable_create(ssize_t limit, CTABLE_CREATE_FN create, CTABLE_DELETE_FN delete, void *context)
Definition: ctable.c:119
VSTRING * vstring_alloc(ssize_t len)
Definition: vstring.c:353
VSTRING * vstring_sprintf(VSTRING *vp, const char *format,...)
Definition: vstring.c:602
#define allascii(s)
Definition: stringops.h:66
NORETURN msg_fatal(const char *fmt,...)
Definition: msg.c:249
char * mystrndup(const char *str, ssize_t len)
Definition: mymalloc.c:242
const char * midna_domain_suffix_to_ascii(const char *suffix)
Definition: midna_domain.c:289
const char * midna_domain_to_ascii(const char *name)
Definition: midna_domain.c:261
#define NAME_MASK_NUMBER
Definition: name_mask.h:32
#define DEF_MIDNA_CACHE_SIZE
Definition: midna_domain.c:103
void msg_vstream_init(const char *name, VSTREAM *vp)
Definition: msg_vstream.c:77
#define ISSPACE(c)
Definition: sys_defs.h:1753
int util_utf8_enable
Definition: printable.c:47
#define NAME_MASK_COMMA
Definition: name_mask.h:30
#define VSTREAM_ERR
Definition: vstream.h:68
#define DONT_GRIPE
Definition: haproxy_srvr.h:31
void msg_info(const char *fmt,...)
Definition: msg.c:199