Postfix3.3.1
strcasecmp_utf8.c
[詳解]
1 /*++
2 /* NAME
3 /* strcasecmp_utf8 3
4 /* SUMMARY
5 /* caseless string comparison
6 /* SYNOPSIS
7 /* #include <stringops.h>
8 /*
9 /* int strcasecmp_utf8(
10 /* const char *s1,
11 /* const char *s2)
12 /*
13 /* int strncasecmp_utf8(
14 /* const char *s1,
15 /* const char *s2,
16 /* ssize_t len)
17 /* AUXILIARY FUNCTIONS
18 /* int strcasecmp_utf8x(
19 /* int flags,
20 /* const char *s1,
21 /* const char *s2)
22 /*
23 /* int strncasecmp_utf8x(
24 /* int flags,
25 /* const char *s1,
26 /* const char *s2,
27 /* ssize_t len)
28 /* DESCRIPTION
29 /* strcasecmp_utf8() implements caseless string comparison for
30 /* UTF-8 text, with an API similar to strcasecmp(). Only ASCII
31 /* characters are casefolded when the code is compiled without
32 /* EAI support or when util_utf8_enable is zero.
33 /*
34 /* strncasecmp_utf8() implements caseless string comparison
35 /* for UTF-8 text, with an API similar to strncasecmp(). Only
36 /* ASCII characters are casefolded when the code is compiled
37 /* without EAI support or when util_utf8_enable is zero.
38 /*
39 /* strcasecmp_utf8x() and strncasecmp_utf8x() implement a more
40 /* complex API that provides the above functionality and more.
41 /*
42 /* Arguments:
43 /* .IP "s1, s2"
44 /* Null-terminated strings to be compared.
45 /* .IP len
46 /* String length before casefolding.
47 /* .IP flags
48 /* Zero or CASEF_FLAG_UTF8. The latter flag enables UTF-8 case
49 /* folding instead of folding only ASCII characters. This flag
50 /* is ignored when compiled without EAI support.
51 /* SEE ALSO
52 /* casefold(), casefold text for caseless comparison.
53 /* LICENSE
54 /* .ad
55 /* .fi
56 /* The Secure Mailer license must be distributed with this software.
57 /* AUTHOR(S)
58 /* Wietse Venema
59 /* IBM T.J. Watson Research
60 /* P.O. Box 704
61 /* Yorktown Heights, NY 10598, USA
62 /*
63 /* Wietse Venema
64 /* Google, Inc.
65 /* 111 8th Avenue
66 /* New York, NY 10011, USA
67 /*--*/
68 
69  /*
70  * System library.
71  */
72 #include <sys_defs.h>
73 #include <string.h>
74 
75 #ifdef STRCASECMP_IN_STRINGS_H
76 #include <strings.h>
77 #endif
78 
79  /*
80  * Utility library.
81  */
82 #include <stringops.h>
83 
84 #define STR(x) vstring_str(x)
85 
86 static VSTRING *f1; /* casefold result for s1 */
87 static VSTRING *f2; /* casefold result for s2 */
88 
89 /* strcasecmp_utf8_init - initialize */
90 
91 static void strcasecmp_utf8_init(void)
92 {
93  f1 = vstring_alloc(100);
94  f2 = vstring_alloc(100);
95 }
96 
97 /* strcasecmp_utf8x - caseless string comparison */
98 
99 int strcasecmp_utf8x(int flags, const char *s1, const char *s2)
100 {
101 
102  /*
103  * Short-circuit optimization for ASCII-only text. This may be slower
104  * than using a cache for all results. We must not expose strcasecmp(3)
105  * to non-ASCII text.
106  */
107  if (allascii(s1) && allascii(s2))
108  return (strcasecmp(s1, s2));
109 
110  if (f1 == 0)
111  strcasecmp_utf8_init();
112 
113  /*
114  * Cross our fingers and hope that strcmp() remains agnostic of
115  * charactersets and locales.
116  */
117  flags &= CASEF_FLAG_UTF8;
118  casefoldx(flags, f1, s1, -1);
119  casefoldx(flags, f2, s2, -1);
120  return (strcmp(STR(f1), STR(f2)));
121 }
122 
123 /* strncasecmp_utf8x - caseless string comparison */
124 
125 int strncasecmp_utf8x(int flags, const char *s1, const char *s2,
126  ssize_t len)
127 {
128 
129  /*
130  * Consider using a cache for all results.
131  */
132  if (f1 == 0)
133  strcasecmp_utf8_init();
134 
135  /*
136  * Short-circuit optimization for ASCII-only text. This may be slower
137  * than using a cache for all results. See comments above for limitations
138  * of strcasecmp().
139  */
140  if (allascii_len(s1, len) && allascii_len(s2, len))
141  return (strncasecmp(s1, s2, len));
142 
143  /*
144  * Caution: casefolding may change the number of bytes. See comments
145  * above for concerns about strcmp().
146  */
147  flags &= CASEF_FLAG_UTF8;
148  casefoldx(flags, f1, s1, len);
149  casefoldx(flags, f2, s2, len);
150  return (strcmp(STR(f1), STR(f2)));
151 }
152 
153 #ifdef TEST
154 #include <stdio.h>
155 #include <stdlib.h>
156 #include <vstream.h>
157 #include <vstring_vstream.h>
158 #include <msg_vstream.h>
159 #include <argv.h>
160 
161 int main(int argc, char **argv)
162 {
163  VSTRING *buffer = vstring_alloc(1);
164  ARGV *cmd;
165  char **args;
166  int len;
167  int flags;
168  int res;
169 
170  msg_vstream_init(argv[0], VSTREAM_ERR);
171  flags = CASEF_FLAG_UTF8;
172  util_utf8_enable = 1;
173  while (vstring_fgets_nonl(buffer, VSTREAM_IN)) {
174  vstream_printf("> %s\n", STR(buffer));
175  cmd = argv_split(STR(buffer), CHARS_SPACE);
176  if (cmd->argc == 0 || cmd->argv[0][0] == '#')
177  continue;
178  args = cmd->argv;
179 
180  /*
181  * Compare two strings.
182  */
183  if (strcmp(args[0], "compare") == 0 && cmd->argc == 3) {
184  res = strcasecmp_utf8x(flags, args[1], args[2]);
185  vstream_printf("\"%s\" %s \"%s\"\n",
186  args[1],
187  res < 0 ? "<" : res == 0 ? "==" : ">",
188  args[2]);
189  }
190 
191  /*
192  * Compare two substrings.
193  */
194  else if (strcmp(args[0], "compare-len") == 0 && cmd->argc == 4
195  && sscanf(args[3], "%d", &len) == 1 && len >= 0) {
196  res = strncasecmp_utf8x(flags, args[1], args[2], len);
197  vstream_printf("\"%.*s\" %s \"%.*s\"\n",
198  len, args[1],
199  res < 0 ? "<" : res == 0 ? "==" : ">",
200  len, args[2]);
201  }
202 
203  /*
204  * Usage.
205  */
206  else {
207  vstream_printf("Usage: %s compare <s1> <s2> | compare-len <s1> <s2> <len>\n",
208  argv[0]);
209  }
211  argv_free(cmd);
212  }
213  exit(0);
214 }
215 
216 #endif /* TEST */
#define vstring_fgets_nonl(s, p)
ARGV * argv_free(ARGV *argvp)
Definition: argv.c:136
Definition: argv.h:17
#define VSTREAM_OUT
Definition: vstream.h:67
int main(int argc, char **argv)
Definition: anvil.c:1010
char ** argv
Definition: argv.h:20
#define VSTREAM_IN
Definition: vstream.h:66
int strncasecmp(const char *s1, const char *s2, size_t n)
Definition: strcasecmp.c:52
int strcasecmp_utf8x(int flags, const char *s1, const char *s2)
char * casefoldx(int flags, VSTRING *dest, const char *src, ssize_t len)
Definition: casefold.c:114
int allascii_len(const char *string, ssize_t len)
Definition: allascii.c:51
VSTREAM * vstream_printf(const char *fmt,...)
Definition: vstream.c:1335
VSTRING * vstring_alloc(ssize_t len)
Definition: vstring.c:353
#define CHARS_SPACE
Definition: sys_defs.h:1762
#define allascii(s)
Definition: stringops.h:66
int vstream_fflush(VSTREAM *stream)
Definition: vstream.c:1257
ARGV * argv_split(const char *, const char *)
Definition: argv_split.c:63
int strcasecmp(const char *s1, const char *s2)
Definition: strcasecmp.c:41
void msg_vstream_init(const char *name, VSTREAM *vp)
Definition: msg_vstream.c:77
ssize_t argc
Definition: argv.h:19
int util_utf8_enable
Definition: printable.c:47
#define CASEF_FLAG_UTF8
Definition: stringops.h:60
int strncasecmp_utf8x(int flags, const char *s1, const char *s2, ssize_t len)
#define VSTREAM_ERR
Definition: vstream.h:68
#define STR(x)