Postfix3.3.1
uxtext.c
[詳解]
1 /*++
2 /* NAME
3 /* uxtext 3
4 /* SUMMARY
5 /* quote/unquote text, xtext style.
6 /* SYNOPSIS
7 /* #include <uxtext.h>
8 /*
9 /* VSTRING *uxtext_quote(quoted, unquoted, special)
10 /* VSTRING *quoted;
11 /* const char *unquoted;
12 /* const char *special;
13 /*
14 /* VSTRING *uxtext_quote_append(unquoted, quoted, special)
15 /* VSTRING *unquoted;
16 /* const char *quoted;
17 /* const char *special;
18 /*
19 /* VSTRING *uxtext_unquote(unquoted, quoted)
20 /* VSTRING *unquoted;
21 /* const char *quoted;
22 /*
23 /* VSTRING *uxtext_unquote_append(unquoted, quoted)
24 /* VSTRING *unquoted;
25 /* const char *quoted;
26 /* DESCRIPTION
27 /* uxtext_quote() takes a null-terminated UTF8 string and
28 /* replaces characters \, <33(10) and >126(10), as well as
29 /* characters specified with "special" with \x{XX}, XX being
30 /* a 2-6-digit uppercase hexadecimal equivalent.
31 /*
32 /* uxtext_quote_append() is like uxtext_quote(), but appends
33 /* the conversion result to the result buffer.
34 /*
35 /* uxtext_unquote() performs the opposite transformation. This
36 /* function understands lowercase, uppercase, and mixed case
37 /* \x{XX...} sequences. The result value is the unquoted
38 /* argument in case of success, a null pointer otherwise.
39 /*
40 /* uxtext_unquote_append() is like uxtext_unquote(), but appends
41 /* the conversion result to the result buffer.
42 /* BUGS
43 /* This module cannot process null characters in data.
44 /* LICENSE
45 /* .ad
46 /* .fi
47 /* The Secure Mailer license must be distributed with this software.
48 /* AUTHOR(S)
49 /* Arnt Gulbrandsen
50 /*
51 /* Wietse Venema
52 /* IBM T.J. Watson Research
53 /* P.O. Box 704
54 /* Yorktown Heights, NY 10598, USA
55 /*
56 /* Wietse Venema
57 /* Google, Inc.
58 /* 111 8th Avenue
59 /* New York, NY 10011, USA
60 /*--*/
61 
62 /* System library. */
63 
64 #include <sys_defs.h>
65 #include <string.h>
66 #include <ctype.h>
67 
68 /* Utility library. */
69 
70 #include "msg.h"
71 #include "vstring.h"
72 #include "uxtext.h"
73 
74 /* Application-specific. */
75 
76 #define STR(x) vstring_str(x)
77 #define LEN(x) VSTRING_LEN(x)
78 
79 /* uxtext_quote_append - append unquoted data to quoted data */
80 
81 VSTRING *uxtext_quote_append(VSTRING *quoted, const char *unquoted,
82  const char *special)
83 {
84  unsigned const char *cp;
85  int ch;
86 
87  for (cp = (unsigned const char *) unquoted; (ch = *cp) != 0; cp++) {
88  /* Fix 20140709: the '\' character must always be quoted. */
89  if (ch != '\\' && ch > 32 && ch < 127
90  && (*special == 0 || strchr(special, ch) == 0)) {
91  VSTRING_ADDCH(quoted, ch);
92  } else {
93 
94  /*
95  * had RFC6533 been written like 6531 and 6532, this else clause
96  * would be one line long.
97  */
98  int unicode = 0;
99  int pick = 0;
100 
101  if (ch < 0x80) {
102  //0000 0000 - 0000 007 F 0x xxxxxx
103  unicode = ch;
104  } else if ((ch & 0xe0) == 0xc0) {
105  //0000 0080 - 0000 07 FF 110 xxxxx 10 xxxxxx
106  unicode = (ch & 0x1f);
107  pick = 1;
108  } else if ((ch & 0xf0) == 0xe0) {
109  //0000 0800 - 0000 FFFF 1110 xxxx 10 xxxxxx 10 xxxxxx
110  unicode = (ch & 0x0f);
111  pick = 2;
112  } else if ((ch & 0xf8) == 0xf0) {
113  //0001 0000 - 001 F FFFF 11110 xxx 10 xxxxxx 10 xxxxxx 10 xxxxxx
114  unicode = (ch & 0x07);
115  pick = 3;
116  } else if ((ch & 0xfc) == 0xf8) {
117  //0020 0000 - 03 FF FFFF 111110 xx 10 xxxxxx 10 xxxxxx...10 xxxxxx
118  unicode = (ch & 0x03);
119  pick = 4;
120  } else if ((ch & 0xfe) == 0xfc) {
121  //0400 0000 - 7 FFF FFFF 1111110 x 10 xxxxxx...10 xxxxxx
122  unicode = (ch & 0x01);
123  pick = 5;
124  } else {
125  return (0);
126  }
127  while (pick > 0) {
128  ch = *++cp;
129  if ((ch & 0xc0) != 0x80)
130  return (0);
131  unicode = unicode << 6 | (ch & 0x3f);
132  pick--;
133  }
134  vstring_sprintf_append(quoted, "\\x{%02X}", unicode);
135  }
136  }
137  VSTRING_TERMINATE(quoted);
138  return (quoted);
139 }
140 
141 /* uxtext_quote - unquoted data to quoted */
142 
143 VSTRING *uxtext_quote(VSTRING *quoted, const char *unquoted, const char *special)
144 {
145  VSTRING_RESET(quoted);
146  uxtext_quote_append(quoted, unquoted, special);
147  return (quoted);
148 }
149 
150 /* uxtext_unquote_append - quoted data to unquoted */
151 
152 VSTRING *uxtext_unquote_append(VSTRING *unquoted, const char *quoted)
153 {
154  const unsigned char *cp;
155  int ch;
156 
157  for (cp = (const unsigned char *) quoted; (ch = *cp) != 0; cp++) {
158  if (ch == '\\' && cp[1] == 'x' && cp[2] == '{') {
159  int unicode = 0;
160 
161  cp += 2;
162  while ((ch = *++cp) != '}') {
163  if (ISDIGIT(ch))
164  unicode = (unicode << 4) + (ch - '0');
165  else if (ch >= 'a' && ch <= 'f')
166  unicode = (unicode << 4) + (ch - 'a' + 10);
167  else if (ch >= 'A' && ch <= 'F')
168  unicode = (unicode << 4) + (ch - 'A' + 10);
169  else
170  return (0); /* also covers the null
171  * terminator */
172  if (unicode > 0x10ffff)
173  return (0);
174  }
175 
176  /*
177  * the following block is from
178  * https://github.com/aox/aox/blob/master/encodings/utf.cpp, with
179  * permission by the authors.
180  */
181  if (unicode < 0x80) {
182  VSTRING_ADDCH(unquoted, (char) unicode);
183  } else if (unicode < 0x800) {
184  VSTRING_ADDCH(unquoted, 0xc0 | ((char) (unicode >> 6)));
185  VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
186  } else if (unicode < 0x10000) {
187  VSTRING_ADDCH(unquoted, 0xe0 | ((char) (unicode >> 12)));
188  VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
189  VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
190  } else if (unicode < 0x200000) {
191  VSTRING_ADDCH(unquoted, 0xf0 | ((char) (unicode >> 18)));
192  VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f));
193  VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
194  VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
195  } else if (unicode < 0x4000000) {
196  VSTRING_ADDCH(unquoted, 0xf8 | ((char) (unicode >> 24)));
197  VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 18) & 0x3f));
198  VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f));
199  VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
200  VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
201  } else {
202  VSTRING_ADDCH(unquoted, 0xfc | ((char) (unicode >> 30)));
203  VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 24) & 0x3f));
204  VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 18) & 0x3f));
205  VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f));
206  VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
207  VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
208  }
209  } else {
210  VSTRING_ADDCH(unquoted, ch);
211  }
212  }
213  VSTRING_TERMINATE(unquoted);
214  return (unquoted);
215 }
216 
217 /* uxtext_unquote - quoted data to unquoted */
218 
219 VSTRING *uxtext_unquote(VSTRING *unquoted, const char *quoted)
220 {
221  VSTRING_RESET(unquoted);
222  return (uxtext_unquote_append(unquoted, quoted) ? unquoted : 0);
223 }
224 
225 #ifdef TEST
226 
227  /*
228  * Proof-of-concept test program: convert to quoted and back.
229  */
230 #include <vstream.h>
231 
232 #define BUFLEN 1024
233 
234 static ssize_t read_buf(VSTREAM *fp, VSTRING *buf)
235 {
236  ssize_t len;
237 
238  VSTRING_RESET(buf);
239  len = vstream_fread(fp, STR(buf), vstring_avail(buf));
240  VSTRING_AT_OFFSET(buf, len); /* XXX */
241  VSTRING_TERMINATE(buf);
242  return (len);
243 }
244 
245 int main(int unused_argc, char **unused_argv)
246 {
247  VSTRING *unquoted = vstring_alloc(BUFLEN);
248  VSTRING *quoted = vstring_alloc(100);
249  ssize_t len;
250 
251  /*
252  * Negative tests.
253  */
254  if (uxtext_unquote(unquoted, "\\x{x1}") != 0)
255  msg_warn("undetected error pattern 1");
256  if (uxtext_unquote(unquoted, "\\x{2x}") != 0)
257  msg_warn("undetected error pattern 2");
258  if (uxtext_unquote(unquoted, "\\x{33") != 0)
259  msg_warn("undetected error pattern 3");
260 
261  /*
262  * Positive tests.
263  */
264  while ((len = read_buf(VSTREAM_IN, unquoted)) > 0) {
265  uxtext_quote(quoted, STR(unquoted), "+=");
266  if (uxtext_unquote(unquoted, STR(quoted)) == 0)
267  msg_fatal("bad input: %.100s", STR(quoted));
268  if (LEN(unquoted) != len)
269  msg_fatal("len %ld != unquoted len %ld",
270  (long) len, (long) LEN(unquoted));
271  if (vstream_fwrite(VSTREAM_OUT, STR(unquoted), LEN(unquoted)) != LEN(unquoted))
272  msg_fatal("write error: %m");
273  }
275  vstring_free(unquoted);
276  vstring_free(quoted);
277  return (0);
278 }
279 
280 #endif
VSTRING * uxtext_unquote_append(VSTRING *unquoted, const char *quoted)
Definition: uxtext.c:152
#define VSTREAM_OUT
Definition: vstream.h:67
int main(int argc, char **argv)
Definition: anvil.c:1010
#define STR(x)
Definition: uxtext.c:76
#define VSTREAM_IN
Definition: vstream.h:66
VSTRING * uxtext_quote_append(VSTRING *quoted, const char *unquoted, const char *special)
Definition: uxtext.c:81
#define VSTRING_TERMINATE(vp)
Definition: vstring.h:74
#define LEN(x)
Definition: uxtext.c:77
#define VSTRING_ADDCH(vp, ch)
Definition: vstring.h:81
VSTRING * vstring_sprintf_append(VSTRING *vp, const char *format,...)
Definition: vstring.c:624
#define ISDIGIT(c)
Definition: sys_defs.h:1748
VSTRING * uxtext_quote(VSTRING *quoted, const char *unquoted, const char *special)
Definition: uxtext.c:143
#define VSTRING_RESET(vp)
Definition: vstring.h:77
void msg_warn(const char *fmt,...)
Definition: msg.c:215
VSTRING * vstring_alloc(ssize_t len)
Definition: vstring.c:353
#define vstring_avail(vp)
Definition: vstring.h:86
NORETURN msg_fatal(const char *fmt,...)
Definition: msg.c:249
#define vstream_fread(v, b, n)
Definition: vstream.h:104
int vstream_fflush(VSTREAM *stream)
Definition: vstream.c:1257
#define vstream_fwrite(v, b, n)
Definition: vstream.h:105
VSTRING * vstring_free(VSTRING *vp)
Definition: vstring.c:380
#define VSTRING_AT_OFFSET(vp, offset)
Definition: vstring.h:92
VSTRING * uxtext_unquote(VSTRING *unquoted, const char *quoted)
Definition: uxtext.c:219