]> granicus.if.org Git - postgresql/blob - src/common/saslprep.c
Post-PG 10 beta1 pgindent run
[postgresql] / src / common / saslprep.c
1 /*-------------------------------------------------------------------------
2  * saslprep.c
3  *              SASLprep normalization, for SCRAM authentication
4  *
5  * The SASLprep algorithm is used to process a user-supplied password into
6  * canonical form.  For more details, see:
7  *
8  * [RFC3454] Preparation of Internationalized Strings ("stringprep"),
9  *        http://www.ietf.org/rfc/rfc3454.txt
10  *
11  * [RFC4013] SASLprep: Stringprep Profile for User Names and Passwords
12  *        http://www.ietf.org/rfc/rfc4013.txt
13  *
14  *
15  * Portions Copyright (c) 2017, PostgreSQL Global Development Group
16  *
17  * IDENTIFICATION
18  *        src/common/saslprep.c
19  *
20  *-------------------------------------------------------------------------
21  */
22 #ifndef FRONTEND
23 #include "postgres.h"
24 #else
25 #include "postgres_fe.h"
26 #endif
27
28 #include "common/saslprep.h"
29 #include "common/unicode_norm.h"
30
31 /*
32  * Note: The functions in this file depend on functions from
33  * src/backend/utils/mb/wchar.c, so in order to use this in frontend
34  * code, you will need to link that in, too.
35  */
36 #include "mb/pg_wchar.h"
37
38 /*
39  * Limit on how large password's we will try to process.  A password
40  * larger than this will be treated the same as out-of-memory.
41  */
42 #define MAX_PASSWORD_LENGTH             1024
43
44 /*
45  * In backend, we will use palloc/pfree.  In frontend, use malloc, and
46  * return SASLPREP_OOM on out-of-memory.
47  */
48 #ifndef FRONTEND
49 #define STRDUP(s) pstrdup(s)
50 #define ALLOC(size) palloc(size)
51 #define FREE(size) pfree(size)
52 #else
53 #define STRDUP(s) strdup(s)
54 #define ALLOC(size) malloc(size)
55 #define FREE(size) free(size)
56 #endif
57
58 /* Prototypes for local functions */
59 static int      codepoint_range_cmp(const void *a, const void *b);
60 static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize);
61 static int      pg_utf8_string_len(const char *source);
62 static bool pg_is_ascii_string(const char *p);
63
64 /*
65  * Stringprep Mapping Tables.
66  *
67  * The stringprep specification includes a number of tables of Unicode
68  * codepoints, used in different parts of the algorithm.  They are below,
69  * as arrays of codepoint ranges.  Each range is a pair of codepoints,
70  * for the first and last codepoint included the range (inclusive!).
71  */
72
73 /*
74  * C.1.2 Non-ASCII space characters
75  *
76  * These are all mapped to the ASCII space character (U+00A0).
77  */
78 static const pg_wchar non_ascii_space_ranges[] =
79 {
80         0x00A0, 0x00A0,
81         0x1680, 0x1680,
82         0x2000, 0x200B,
83         0x202F, 0x202F,
84         0x205F, 0x205F,
85         0x3000, 0x3000
86 };
87
88 /*
89  * B.1 Commonly mapped to nothing
90  *
91  * If any of these appear in the input, they are removed.
92  */
93 static const pg_wchar commonly_mapped_to_nothing_ranges[] =
94 {
95         0x00AD, 0x00AD,
96         0x034F, 0x034F,
97         0x1806, 0x1806,
98         0x180B, 0x180D,
99         0x200B, 0x200D,
100         0x2060, 0x2060,
101         0xFE00, 0xFE0F,
102         0xFEFF, 0xFEFF
103 };
104
105 /*
106  * prohibited_output_ranges is a union of all the characters from
107  * the following tables:
108  *
109  * C.1.2 Non-ASCII space characters
110  * C.2.1 ASCII control characters
111  * C.2.2 Non-ASCII control characters
112  * C.3 Private Use characters
113  * C.4 Non-character code points
114  * C.5 Surrogate code points
115  * C.6 Inappropriate for plain text characters
116  * C.7 Inappropriate for canonical representation characters
117  * C.7 Change display properties or deprecated characters
118  * C.8 Tagging characters
119  *
120  * These are the tables that are listed as "prohibited output"
121  * characters in the SASLprep profile.
122  *
123  * The comment after each code range indicates which source table
124  * the code came from.  Note that there is some overlap in the source
125  * tables, so one code might originate from multiple source tables.
126  * Adjacent ranges have also been merged together, to save space.
127  */
128 static const pg_wchar prohibited_output_ranges[] =
129 {
130         0x0000, 0x001F,                         /* C.2.1 */
131         0x007F, 0x00A0,                         /* C.1.2, C.2.1, C.2.2 */
132         0x0340, 0x0341,                         /* C.8 */
133         0x06DD, 0x06DD,                         /* C.2.2 */
134         0x070F, 0x070F,                         /* C.2.2 */
135         0x1680, 0x1680,                         /* C.1.2 */
136         0x180E, 0x180E,                         /* C.2.2 */
137         0x2000, 0x200F,                         /* C.1.2, C.2.2, C.8 */
138         0x2028, 0x202F,                         /* C.1.2, C.2.2, C.8 */
139         0x205F, 0x2063,                         /* C.1.2, C.2.2 */
140         0x206A, 0x206F,                         /* C.2.2, C.8 */
141         0x2FF0, 0x2FFB,                         /* C.7 */
142         0x3000, 0x3000,                         /* C.1.2 */
143         0xD800, 0xF8FF,                         /* C.3, C.5 */
144         0xFDD0, 0xFDEF,                         /* C.4 */
145         0xFEFF, 0xFEFF,                         /* C.2.2 */
146         0xFFF9, 0xFFFF,                         /* C.2.2, C.4, C.6 */
147         0x1D173, 0x1D17A,                       /* C.2.2 */
148         0x1FFFE, 0x1FFFF,                       /* C.4 */
149         0x2FFFE, 0x2FFFF,                       /* C.4 */
150         0x3FFFE, 0x3FFFF,                       /* C.4 */
151         0x4FFFE, 0x4FFFF,                       /* C.4 */
152         0x5FFFE, 0x5FFFF,                       /* C.4 */
153         0x6FFFE, 0x6FFFF,                       /* C.4 */
154         0x7FFFE, 0x7FFFF,                       /* C.4 */
155         0x8FFFE, 0x8FFFF,                       /* C.4 */
156         0x9FFFE, 0x9FFFF,                       /* C.4 */
157         0xAFFFE, 0xAFFFF,                       /* C.4 */
158         0xBFFFE, 0xBFFFF,                       /* C.4 */
159         0xCFFFE, 0xCFFFF,                       /* C.4 */
160         0xDFFFE, 0xDFFFF,                       /* C.4 */
161         0xE0001, 0xE0001,                       /* C.9 */
162         0xE0020, 0xE007F,                       /* C.9 */
163         0xEFFFE, 0xEFFFF,                       /* C.4 */
164         0xF0000, 0xFFFFF,                       /* C.3, C.4 */
165         0x100000, 0x10FFFF                      /* C.3, C.4 */
166 };
167
168 /* A.1 Unassigned code points in Unicode 3.2 */
169 static const pg_wchar unassigned_codepoint_ranges[] =
170 {
171         0x0221, 0x0221,
172         0x0234, 0x024F,
173         0x02AE, 0x02AF,
174         0x02EF, 0x02FF,
175         0x0350, 0x035F,
176         0x0370, 0x0373,
177         0x0376, 0x0379,
178         0x037B, 0x037D,
179         0x037F, 0x0383,
180         0x038B, 0x038B,
181         0x038D, 0x038D,
182         0x03A2, 0x03A2,
183         0x03CF, 0x03CF,
184         0x03F7, 0x03FF,
185         0x0487, 0x0487,
186         0x04CF, 0x04CF,
187         0x04F6, 0x04F7,
188         0x04FA, 0x04FF,
189         0x0510, 0x0530,
190         0x0557, 0x0558,
191         0x0560, 0x0560,
192         0x0588, 0x0588,
193         0x058B, 0x0590,
194         0x05A2, 0x05A2,
195         0x05BA, 0x05BA,
196         0x05C5, 0x05CF,
197         0x05EB, 0x05EF,
198         0x05F5, 0x060B,
199         0x060D, 0x061A,
200         0x061C, 0x061E,
201         0x0620, 0x0620,
202         0x063B, 0x063F,
203         0x0656, 0x065F,
204         0x06EE, 0x06EF,
205         0x06FF, 0x06FF,
206         0x070E, 0x070E,
207         0x072D, 0x072F,
208         0x074B, 0x077F,
209         0x07B2, 0x0900,
210         0x0904, 0x0904,
211         0x093A, 0x093B,
212         0x094E, 0x094F,
213         0x0955, 0x0957,
214         0x0971, 0x0980,
215         0x0984, 0x0984,
216         0x098D, 0x098E,
217         0x0991, 0x0992,
218         0x09A9, 0x09A9,
219         0x09B1, 0x09B1,
220         0x09B3, 0x09B5,
221         0x09BA, 0x09BB,
222         0x09BD, 0x09BD,
223         0x09C5, 0x09C6,
224         0x09C9, 0x09CA,
225         0x09CE, 0x09D6,
226         0x09D8, 0x09DB,
227         0x09DE, 0x09DE,
228         0x09E4, 0x09E5,
229         0x09FB, 0x0A01,
230         0x0A03, 0x0A04,
231         0x0A0B, 0x0A0E,
232         0x0A11, 0x0A12,
233         0x0A29, 0x0A29,
234         0x0A31, 0x0A31,
235         0x0A34, 0x0A34,
236         0x0A37, 0x0A37,
237         0x0A3A, 0x0A3B,
238         0x0A3D, 0x0A3D,
239         0x0A43, 0x0A46,
240         0x0A49, 0x0A4A,
241         0x0A4E, 0x0A58,
242         0x0A5D, 0x0A5D,
243         0x0A5F, 0x0A65,
244         0x0A75, 0x0A80,
245         0x0A84, 0x0A84,
246         0x0A8C, 0x0A8C,
247         0x0A8E, 0x0A8E,
248         0x0A92, 0x0A92,
249         0x0AA9, 0x0AA9,
250         0x0AB1, 0x0AB1,
251         0x0AB4, 0x0AB4,
252         0x0ABA, 0x0ABB,
253         0x0AC6, 0x0AC6,
254         0x0ACA, 0x0ACA,
255         0x0ACE, 0x0ACF,
256         0x0AD1, 0x0ADF,
257         0x0AE1, 0x0AE5,
258         0x0AF0, 0x0B00,
259         0x0B04, 0x0B04,
260         0x0B0D, 0x0B0E,
261         0x0B11, 0x0B12,
262         0x0B29, 0x0B29,
263         0x0B31, 0x0B31,
264         0x0B34, 0x0B35,
265         0x0B3A, 0x0B3B,
266         0x0B44, 0x0B46,
267         0x0B49, 0x0B4A,
268         0x0B4E, 0x0B55,
269         0x0B58, 0x0B5B,
270         0x0B5E, 0x0B5E,
271         0x0B62, 0x0B65,
272         0x0B71, 0x0B81,
273         0x0B84, 0x0B84,
274         0x0B8B, 0x0B8D,
275         0x0B91, 0x0B91,
276         0x0B96, 0x0B98,
277         0x0B9B, 0x0B9B,
278         0x0B9D, 0x0B9D,
279         0x0BA0, 0x0BA2,
280         0x0BA5, 0x0BA7,
281         0x0BAB, 0x0BAD,
282         0x0BB6, 0x0BB6,
283         0x0BBA, 0x0BBD,
284         0x0BC3, 0x0BC5,
285         0x0BC9, 0x0BC9,
286         0x0BCE, 0x0BD6,
287         0x0BD8, 0x0BE6,
288         0x0BF3, 0x0C00,
289         0x0C04, 0x0C04,
290         0x0C0D, 0x0C0D,
291         0x0C11, 0x0C11,
292         0x0C29, 0x0C29,
293         0x0C34, 0x0C34,
294         0x0C3A, 0x0C3D,
295         0x0C45, 0x0C45,
296         0x0C49, 0x0C49,
297         0x0C4E, 0x0C54,
298         0x0C57, 0x0C5F,
299         0x0C62, 0x0C65,
300         0x0C70, 0x0C81,
301         0x0C84, 0x0C84,
302         0x0C8D, 0x0C8D,
303         0x0C91, 0x0C91,
304         0x0CA9, 0x0CA9,
305         0x0CB4, 0x0CB4,
306         0x0CBA, 0x0CBD,
307         0x0CC5, 0x0CC5,
308         0x0CC9, 0x0CC9,
309         0x0CCE, 0x0CD4,
310         0x0CD7, 0x0CDD,
311         0x0CDF, 0x0CDF,
312         0x0CE2, 0x0CE5,
313         0x0CF0, 0x0D01,
314         0x0D04, 0x0D04,
315         0x0D0D, 0x0D0D,
316         0x0D11, 0x0D11,
317         0x0D29, 0x0D29,
318         0x0D3A, 0x0D3D,
319         0x0D44, 0x0D45,
320         0x0D49, 0x0D49,
321         0x0D4E, 0x0D56,
322         0x0D58, 0x0D5F,
323         0x0D62, 0x0D65,
324         0x0D70, 0x0D81,
325         0x0D84, 0x0D84,
326         0x0D97, 0x0D99,
327         0x0DB2, 0x0DB2,
328         0x0DBC, 0x0DBC,
329         0x0DBE, 0x0DBF,
330         0x0DC7, 0x0DC9,
331         0x0DCB, 0x0DCE,
332         0x0DD5, 0x0DD5,
333         0x0DD7, 0x0DD7,
334         0x0DE0, 0x0DF1,
335         0x0DF5, 0x0E00,
336         0x0E3B, 0x0E3E,
337         0x0E5C, 0x0E80,
338         0x0E83, 0x0E83,
339         0x0E85, 0x0E86,
340         0x0E89, 0x0E89,
341         0x0E8B, 0x0E8C,
342         0x0E8E, 0x0E93,
343         0x0E98, 0x0E98,
344         0x0EA0, 0x0EA0,
345         0x0EA4, 0x0EA4,
346         0x0EA6, 0x0EA6,
347         0x0EA8, 0x0EA9,
348         0x0EAC, 0x0EAC,
349         0x0EBA, 0x0EBA,
350         0x0EBE, 0x0EBF,
351         0x0EC5, 0x0EC5,
352         0x0EC7, 0x0EC7,
353         0x0ECE, 0x0ECF,
354         0x0EDA, 0x0EDB,
355         0x0EDE, 0x0EFF,
356         0x0F48, 0x0F48,
357         0x0F6B, 0x0F70,
358         0x0F8C, 0x0F8F,
359         0x0F98, 0x0F98,
360         0x0FBD, 0x0FBD,
361         0x0FCD, 0x0FCE,
362         0x0FD0, 0x0FFF,
363         0x1022, 0x1022,
364         0x1028, 0x1028,
365         0x102B, 0x102B,
366         0x1033, 0x1035,
367         0x103A, 0x103F,
368         0x105A, 0x109F,
369         0x10C6, 0x10CF,
370         0x10F9, 0x10FA,
371         0x10FC, 0x10FF,
372         0x115A, 0x115E,
373         0x11A3, 0x11A7,
374         0x11FA, 0x11FF,
375         0x1207, 0x1207,
376         0x1247, 0x1247,
377         0x1249, 0x1249,
378         0x124E, 0x124F,
379         0x1257, 0x1257,
380         0x1259, 0x1259,
381         0x125E, 0x125F,
382         0x1287, 0x1287,
383         0x1289, 0x1289,
384         0x128E, 0x128F,
385         0x12AF, 0x12AF,
386         0x12B1, 0x12B1,
387         0x12B6, 0x12B7,
388         0x12BF, 0x12BF,
389         0x12C1, 0x12C1,
390         0x12C6, 0x12C7,
391         0x12CF, 0x12CF,
392         0x12D7, 0x12D7,
393         0x12EF, 0x12EF,
394         0x130F, 0x130F,
395         0x1311, 0x1311,
396         0x1316, 0x1317,
397         0x131F, 0x131F,
398         0x1347, 0x1347,
399         0x135B, 0x1360,
400         0x137D, 0x139F,
401         0x13F5, 0x1400,
402         0x1677, 0x167F,
403         0x169D, 0x169F,
404         0x16F1, 0x16FF,
405         0x170D, 0x170D,
406         0x1715, 0x171F,
407         0x1737, 0x173F,
408         0x1754, 0x175F,
409         0x176D, 0x176D,
410         0x1771, 0x1771,
411         0x1774, 0x177F,
412         0x17DD, 0x17DF,
413         0x17EA, 0x17FF,
414         0x180F, 0x180F,
415         0x181A, 0x181F,
416         0x1878, 0x187F,
417         0x18AA, 0x1DFF,
418         0x1E9C, 0x1E9F,
419         0x1EFA, 0x1EFF,
420         0x1F16, 0x1F17,
421         0x1F1E, 0x1F1F,
422         0x1F46, 0x1F47,
423         0x1F4E, 0x1F4F,
424         0x1F58, 0x1F58,
425         0x1F5A, 0x1F5A,
426         0x1F5C, 0x1F5C,
427         0x1F5E, 0x1F5E,
428         0x1F7E, 0x1F7F,
429         0x1FB5, 0x1FB5,
430         0x1FC5, 0x1FC5,
431         0x1FD4, 0x1FD5,
432         0x1FDC, 0x1FDC,
433         0x1FF0, 0x1FF1,
434         0x1FF5, 0x1FF5,
435         0x1FFF, 0x1FFF,
436         0x2053, 0x2056,
437         0x2058, 0x205E,
438         0x2064, 0x2069,
439         0x2072, 0x2073,
440         0x208F, 0x209F,
441         0x20B2, 0x20CF,
442         0x20EB, 0x20FF,
443         0x213B, 0x213C,
444         0x214C, 0x2152,
445         0x2184, 0x218F,
446         0x23CF, 0x23FF,
447         0x2427, 0x243F,
448         0x244B, 0x245F,
449         0x24FF, 0x24FF,
450         0x2614, 0x2615,
451         0x2618, 0x2618,
452         0x267E, 0x267F,
453         0x268A, 0x2700,
454         0x2705, 0x2705,
455         0x270A, 0x270B,
456         0x2728, 0x2728,
457         0x274C, 0x274C,
458         0x274E, 0x274E,
459         0x2753, 0x2755,
460         0x2757, 0x2757,
461         0x275F, 0x2760,
462         0x2795, 0x2797,
463         0x27B0, 0x27B0,
464         0x27BF, 0x27CF,
465         0x27EC, 0x27EF,
466         0x2B00, 0x2E7F,
467         0x2E9A, 0x2E9A,
468         0x2EF4, 0x2EFF,
469         0x2FD6, 0x2FEF,
470         0x2FFC, 0x2FFF,
471         0x3040, 0x3040,
472         0x3097, 0x3098,
473         0x3100, 0x3104,
474         0x312D, 0x3130,
475         0x318F, 0x318F,
476         0x31B8, 0x31EF,
477         0x321D, 0x321F,
478         0x3244, 0x3250,
479         0x327C, 0x327E,
480         0x32CC, 0x32CF,
481         0x32FF, 0x32FF,
482         0x3377, 0x337A,
483         0x33DE, 0x33DF,
484         0x33FF, 0x33FF,
485         0x4DB6, 0x4DFF,
486         0x9FA6, 0x9FFF,
487         0xA48D, 0xA48F,
488         0xA4C7, 0xABFF,
489         0xD7A4, 0xD7FF,
490         0xFA2E, 0xFA2F,
491         0xFA6B, 0xFAFF,
492         0xFB07, 0xFB12,
493         0xFB18, 0xFB1C,
494         0xFB37, 0xFB37,
495         0xFB3D, 0xFB3D,
496         0xFB3F, 0xFB3F,
497         0xFB42, 0xFB42,
498         0xFB45, 0xFB45,
499         0xFBB2, 0xFBD2,
500         0xFD40, 0xFD4F,
501         0xFD90, 0xFD91,
502         0xFDC8, 0xFDCF,
503         0xFDFD, 0xFDFF,
504         0xFE10, 0xFE1F,
505         0xFE24, 0xFE2F,
506         0xFE47, 0xFE48,
507         0xFE53, 0xFE53,
508         0xFE67, 0xFE67,
509         0xFE6C, 0xFE6F,
510         0xFE75, 0xFE75,
511         0xFEFD, 0xFEFE,
512         0xFF00, 0xFF00,
513         0xFFBF, 0xFFC1,
514         0xFFC8, 0xFFC9,
515         0xFFD0, 0xFFD1,
516         0xFFD8, 0xFFD9,
517         0xFFDD, 0xFFDF,
518         0xFFE7, 0xFFE7,
519         0xFFEF, 0xFFF8,
520         0x10000, 0x102FF,
521         0x1031F, 0x1031F,
522         0x10324, 0x1032F,
523         0x1034B, 0x103FF,
524         0x10426, 0x10427,
525         0x1044E, 0x1CFFF,
526         0x1D0F6, 0x1D0FF,
527         0x1D127, 0x1D129,
528         0x1D1DE, 0x1D3FF,
529         0x1D455, 0x1D455,
530         0x1D49D, 0x1D49D,
531         0x1D4A0, 0x1D4A1,
532         0x1D4A3, 0x1D4A4,
533         0x1D4A7, 0x1D4A8,
534         0x1D4AD, 0x1D4AD,
535         0x1D4BA, 0x1D4BA,
536         0x1D4BC, 0x1D4BC,
537         0x1D4C1, 0x1D4C1,
538         0x1D4C4, 0x1D4C4,
539         0x1D506, 0x1D506,
540         0x1D50B, 0x1D50C,
541         0x1D515, 0x1D515,
542         0x1D51D, 0x1D51D,
543         0x1D53A, 0x1D53A,
544         0x1D53F, 0x1D53F,
545         0x1D545, 0x1D545,
546         0x1D547, 0x1D549,
547         0x1D551, 0x1D551,
548         0x1D6A4, 0x1D6A7,
549         0x1D7CA, 0x1D7CD,
550         0x1D800, 0x1FFFD,
551         0x2A6D7, 0x2F7FF,
552         0x2FA1E, 0x2FFFD,
553         0x30000, 0x3FFFD,
554         0x40000, 0x4FFFD,
555         0x50000, 0x5FFFD,
556         0x60000, 0x6FFFD,
557         0x70000, 0x7FFFD,
558         0x80000, 0x8FFFD,
559         0x90000, 0x9FFFD,
560         0xA0000, 0xAFFFD,
561         0xB0000, 0xBFFFD,
562         0xC0000, 0xCFFFD,
563         0xD0000, 0xDFFFD,
564         0xE0000, 0xE0000,
565         0xE0002, 0xE001F,
566         0xE0080, 0xEFFFD
567 };
568
569 /* D.1 Characters with bidirectional property "R" or "AL" */
570 static const pg_wchar RandALCat_codepoint_ranges[] =
571 {
572         0x05BE, 0x05BE,
573         0x05C0, 0x05C0,
574         0x05C3, 0x05C3,
575         0x05D0, 0x05EA,
576         0x05F0, 0x05F4,
577         0x061B, 0x061B,
578         0x061F, 0x061F,
579         0x0621, 0x063A,
580         0x0640, 0x064A,
581         0x066D, 0x066F,
582         0x0671, 0x06D5,
583         0x06DD, 0x06DD,
584         0x06E5, 0x06E6,
585         0x06FA, 0x06FE,
586         0x0700, 0x070D,
587         0x0710, 0x0710,
588         0x0712, 0x072C,
589         0x0780, 0x07A5,
590         0x07B1, 0x07B1,
591         0x200F, 0x200F,
592         0xFB1D, 0xFB1D,
593         0xFB1F, 0xFB28,
594         0xFB2A, 0xFB36,
595         0xFB38, 0xFB3C,
596         0xFB3E, 0xFB3E,
597         0xFB40, 0xFB41,
598         0xFB43, 0xFB44,
599         0xFB46, 0xFBB1,
600         0xFBD3, 0xFD3D,
601         0xFD50, 0xFD8F,
602         0xFD92, 0xFDC7,
603         0xFDF0, 0xFDFC,
604         0xFE70, 0xFE74,
605         0xFE76, 0xFEFC
606 };
607
608 /* D.2 Characters with bidirectional property "L" */
609 static const pg_wchar LCat_codepoint_ranges[] =
610 {
611         0x0041, 0x005A,
612         0x0061, 0x007A,
613         0x00AA, 0x00AA,
614         0x00B5, 0x00B5,
615         0x00BA, 0x00BA,
616         0x00C0, 0x00D6,
617         0x00D8, 0x00F6,
618         0x00F8, 0x0220,
619         0x0222, 0x0233,
620         0x0250, 0x02AD,
621         0x02B0, 0x02B8,
622         0x02BB, 0x02C1,
623         0x02D0, 0x02D1,
624         0x02E0, 0x02E4,
625         0x02EE, 0x02EE,
626         0x037A, 0x037A,
627         0x0386, 0x0386,
628         0x0388, 0x038A,
629         0x038C, 0x038C,
630         0x038E, 0x03A1,
631         0x03A3, 0x03CE,
632         0x03D0, 0x03F5,
633         0x0400, 0x0482,
634         0x048A, 0x04CE,
635         0x04D0, 0x04F5,
636         0x04F8, 0x04F9,
637         0x0500, 0x050F,
638         0x0531, 0x0556,
639         0x0559, 0x055F,
640         0x0561, 0x0587,
641         0x0589, 0x0589,
642         0x0903, 0x0903,
643         0x0905, 0x0939,
644         0x093D, 0x0940,
645         0x0949, 0x094C,
646         0x0950, 0x0950,
647         0x0958, 0x0961,
648         0x0964, 0x0970,
649         0x0982, 0x0983,
650         0x0985, 0x098C,
651         0x098F, 0x0990,
652         0x0993, 0x09A8,
653         0x09AA, 0x09B0,
654         0x09B2, 0x09B2,
655         0x09B6, 0x09B9,
656         0x09BE, 0x09C0,
657         0x09C7, 0x09C8,
658         0x09CB, 0x09CC,
659         0x09D7, 0x09D7,
660         0x09DC, 0x09DD,
661         0x09DF, 0x09E1,
662         0x09E6, 0x09F1,
663         0x09F4, 0x09FA,
664         0x0A05, 0x0A0A,
665         0x0A0F, 0x0A10,
666         0x0A13, 0x0A28,
667         0x0A2A, 0x0A30,
668         0x0A32, 0x0A33,
669         0x0A35, 0x0A36,
670         0x0A38, 0x0A39,
671         0x0A3E, 0x0A40,
672         0x0A59, 0x0A5C,
673         0x0A5E, 0x0A5E,
674         0x0A66, 0x0A6F,
675         0x0A72, 0x0A74,
676         0x0A83, 0x0A83,
677         0x0A85, 0x0A8B,
678         0x0A8D, 0x0A8D,
679         0x0A8F, 0x0A91,
680         0x0A93, 0x0AA8,
681         0x0AAA, 0x0AB0,
682         0x0AB2, 0x0AB3,
683         0x0AB5, 0x0AB9,
684         0x0ABD, 0x0AC0,
685         0x0AC9, 0x0AC9,
686         0x0ACB, 0x0ACC,
687         0x0AD0, 0x0AD0,
688         0x0AE0, 0x0AE0,
689         0x0AE6, 0x0AEF,
690         0x0B02, 0x0B03,
691         0x0B05, 0x0B0C,
692         0x0B0F, 0x0B10,
693         0x0B13, 0x0B28,
694         0x0B2A, 0x0B30,
695         0x0B32, 0x0B33,
696         0x0B36, 0x0B39,
697         0x0B3D, 0x0B3E,
698         0x0B40, 0x0B40,
699         0x0B47, 0x0B48,
700         0x0B4B, 0x0B4C,
701         0x0B57, 0x0B57,
702         0x0B5C, 0x0B5D,
703         0x0B5F, 0x0B61,
704         0x0B66, 0x0B70,
705         0x0B83, 0x0B83,
706         0x0B85, 0x0B8A,
707         0x0B8E, 0x0B90,
708         0x0B92, 0x0B95,
709         0x0B99, 0x0B9A,
710         0x0B9C, 0x0B9C,
711         0x0B9E, 0x0B9F,
712         0x0BA3, 0x0BA4,
713         0x0BA8, 0x0BAA,
714         0x0BAE, 0x0BB5,
715         0x0BB7, 0x0BB9,
716         0x0BBE, 0x0BBF,
717         0x0BC1, 0x0BC2,
718         0x0BC6, 0x0BC8,
719         0x0BCA, 0x0BCC,
720         0x0BD7, 0x0BD7,
721         0x0BE7, 0x0BF2,
722         0x0C01, 0x0C03,
723         0x0C05, 0x0C0C,
724         0x0C0E, 0x0C10,
725         0x0C12, 0x0C28,
726         0x0C2A, 0x0C33,
727         0x0C35, 0x0C39,
728         0x0C41, 0x0C44,
729         0x0C60, 0x0C61,
730         0x0C66, 0x0C6F,
731         0x0C82, 0x0C83,
732         0x0C85, 0x0C8C,
733         0x0C8E, 0x0C90,
734         0x0C92, 0x0CA8,
735         0x0CAA, 0x0CB3,
736         0x0CB5, 0x0CB9,
737         0x0CBE, 0x0CBE,
738         0x0CC0, 0x0CC4,
739         0x0CC7, 0x0CC8,
740         0x0CCA, 0x0CCB,
741         0x0CD5, 0x0CD6,
742         0x0CDE, 0x0CDE,
743         0x0CE0, 0x0CE1,
744         0x0CE6, 0x0CEF,
745         0x0D02, 0x0D03,
746         0x0D05, 0x0D0C,
747         0x0D0E, 0x0D10,
748         0x0D12, 0x0D28,
749         0x0D2A, 0x0D39,
750         0x0D3E, 0x0D40,
751         0x0D46, 0x0D48,
752         0x0D4A, 0x0D4C,
753         0x0D57, 0x0D57,
754         0x0D60, 0x0D61,
755         0x0D66, 0x0D6F,
756         0x0D82, 0x0D83,
757         0x0D85, 0x0D96,
758         0x0D9A, 0x0DB1,
759         0x0DB3, 0x0DBB,
760         0x0DBD, 0x0DBD,
761         0x0DC0, 0x0DC6,
762         0x0DCF, 0x0DD1,
763         0x0DD8, 0x0DDF,
764         0x0DF2, 0x0DF4,
765         0x0E01, 0x0E30,
766         0x0E32, 0x0E33,
767         0x0E40, 0x0E46,
768         0x0E4F, 0x0E5B,
769         0x0E81, 0x0E82,
770         0x0E84, 0x0E84,
771         0x0E87, 0x0E88,
772         0x0E8A, 0x0E8A,
773         0x0E8D, 0x0E8D,
774         0x0E94, 0x0E97,
775         0x0E99, 0x0E9F,
776         0x0EA1, 0x0EA3,
777         0x0EA5, 0x0EA5,
778         0x0EA7, 0x0EA7,
779         0x0EAA, 0x0EAB,
780         0x0EAD, 0x0EB0,
781         0x0EB2, 0x0EB3,
782         0x0EBD, 0x0EBD,
783         0x0EC0, 0x0EC4,
784         0x0EC6, 0x0EC6,
785         0x0ED0, 0x0ED9,
786         0x0EDC, 0x0EDD,
787         0x0F00, 0x0F17,
788         0x0F1A, 0x0F34,
789         0x0F36, 0x0F36,
790         0x0F38, 0x0F38,
791         0x0F3E, 0x0F47,
792         0x0F49, 0x0F6A,
793         0x0F7F, 0x0F7F,
794         0x0F85, 0x0F85,
795         0x0F88, 0x0F8B,
796         0x0FBE, 0x0FC5,
797         0x0FC7, 0x0FCC,
798         0x0FCF, 0x0FCF,
799         0x1000, 0x1021,
800         0x1023, 0x1027,
801         0x1029, 0x102A,
802         0x102C, 0x102C,
803         0x1031, 0x1031,
804         0x1038, 0x1038,
805         0x1040, 0x1057,
806         0x10A0, 0x10C5,
807         0x10D0, 0x10F8,
808         0x10FB, 0x10FB,
809         0x1100, 0x1159,
810         0x115F, 0x11A2,
811         0x11A8, 0x11F9,
812         0x1200, 0x1206,
813         0x1208, 0x1246,
814         0x1248, 0x1248,
815         0x124A, 0x124D,
816         0x1250, 0x1256,
817         0x1258, 0x1258,
818         0x125A, 0x125D,
819         0x1260, 0x1286,
820         0x1288, 0x1288,
821         0x128A, 0x128D,
822         0x1290, 0x12AE,
823         0x12B0, 0x12B0,
824         0x12B2, 0x12B5,
825         0x12B8, 0x12BE,
826         0x12C0, 0x12C0,
827         0x12C2, 0x12C5,
828         0x12C8, 0x12CE,
829         0x12D0, 0x12D6,
830         0x12D8, 0x12EE,
831         0x12F0, 0x130E,
832         0x1310, 0x1310,
833         0x1312, 0x1315,
834         0x1318, 0x131E,
835         0x1320, 0x1346,
836         0x1348, 0x135A,
837         0x1361, 0x137C,
838         0x13A0, 0x13F4,
839         0x1401, 0x1676,
840         0x1681, 0x169A,
841         0x16A0, 0x16F0,
842         0x1700, 0x170C,
843         0x170E, 0x1711,
844         0x1720, 0x1731,
845         0x1735, 0x1736,
846         0x1740, 0x1751,
847         0x1760, 0x176C,
848         0x176E, 0x1770,
849         0x1780, 0x17B6,
850         0x17BE, 0x17C5,
851         0x17C7, 0x17C8,
852         0x17D4, 0x17DA,
853         0x17DC, 0x17DC,
854         0x17E0, 0x17E9,
855         0x1810, 0x1819,
856         0x1820, 0x1877,
857         0x1880, 0x18A8,
858         0x1E00, 0x1E9B,
859         0x1EA0, 0x1EF9,
860         0x1F00, 0x1F15,
861         0x1F18, 0x1F1D,
862         0x1F20, 0x1F45,
863         0x1F48, 0x1F4D,
864         0x1F50, 0x1F57,
865         0x1F59, 0x1F59,
866         0x1F5B, 0x1F5B,
867         0x1F5D, 0x1F5D,
868         0x1F5F, 0x1F7D,
869         0x1F80, 0x1FB4,
870         0x1FB6, 0x1FBC,
871         0x1FBE, 0x1FBE,
872         0x1FC2, 0x1FC4,
873         0x1FC6, 0x1FCC,
874         0x1FD0, 0x1FD3,
875         0x1FD6, 0x1FDB,
876         0x1FE0, 0x1FEC,
877         0x1FF2, 0x1FF4,
878         0x1FF6, 0x1FFC,
879         0x200E, 0x200E,
880         0x2071, 0x2071,
881         0x207F, 0x207F,
882         0x2102, 0x2102,
883         0x2107, 0x2107,
884         0x210A, 0x2113,
885         0x2115, 0x2115,
886         0x2119, 0x211D,
887         0x2124, 0x2124,
888         0x2126, 0x2126,
889         0x2128, 0x2128,
890         0x212A, 0x212D,
891         0x212F, 0x2131,
892         0x2133, 0x2139,
893         0x213D, 0x213F,
894         0x2145, 0x2149,
895         0x2160, 0x2183,
896         0x2336, 0x237A,
897         0x2395, 0x2395,
898         0x249C, 0x24E9,
899         0x3005, 0x3007,
900         0x3021, 0x3029,
901         0x3031, 0x3035,
902         0x3038, 0x303C,
903         0x3041, 0x3096,
904         0x309D, 0x309F,
905         0x30A1, 0x30FA,
906         0x30FC, 0x30FF,
907         0x3105, 0x312C,
908         0x3131, 0x318E,
909         0x3190, 0x31B7,
910         0x31F0, 0x321C,
911         0x3220, 0x3243,
912         0x3260, 0x327B,
913         0x327F, 0x32B0,
914         0x32C0, 0x32CB,
915         0x32D0, 0x32FE,
916         0x3300, 0x3376,
917         0x337B, 0x33DD,
918         0x33E0, 0x33FE,
919         0x3400, 0x4DB5,
920         0x4E00, 0x9FA5,
921         0xA000, 0xA48C,
922         0xAC00, 0xD7A3,
923         0xD800, 0xFA2D,
924         0xFA30, 0xFA6A,
925         0xFB00, 0xFB06,
926         0xFB13, 0xFB17,
927         0xFF21, 0xFF3A,
928         0xFF41, 0xFF5A,
929         0xFF66, 0xFFBE,
930         0xFFC2, 0xFFC7,
931         0xFFCA, 0xFFCF,
932         0xFFD2, 0xFFD7,
933         0xFFDA, 0xFFDC,
934         0x10300, 0x1031E,
935         0x10320, 0x10323,
936         0x10330, 0x1034A,
937         0x10400, 0x10425,
938         0x10428, 0x1044D,
939         0x1D000, 0x1D0F5,
940         0x1D100, 0x1D126,
941         0x1D12A, 0x1D166,
942         0x1D16A, 0x1D172,
943         0x1D183, 0x1D184,
944         0x1D18C, 0x1D1A9,
945         0x1D1AE, 0x1D1DD,
946         0x1D400, 0x1D454,
947         0x1D456, 0x1D49C,
948         0x1D49E, 0x1D49F,
949         0x1D4A2, 0x1D4A2,
950         0x1D4A5, 0x1D4A6,
951         0x1D4A9, 0x1D4AC,
952         0x1D4AE, 0x1D4B9,
953         0x1D4BB, 0x1D4BB,
954         0x1D4BD, 0x1D4C0,
955         0x1D4C2, 0x1D4C3,
956         0x1D4C5, 0x1D505,
957         0x1D507, 0x1D50A,
958         0x1D50D, 0x1D514,
959         0x1D516, 0x1D51C,
960         0x1D51E, 0x1D539,
961         0x1D53B, 0x1D53E,
962         0x1D540, 0x1D544,
963         0x1D546, 0x1D546,
964         0x1D54A, 0x1D550,
965         0x1D552, 0x1D6A3,
966         0x1D6A8, 0x1D7C9,
967         0x20000, 0x2A6D6,
968         0x2F800, 0x2FA1D,
969         0xF0000, 0xFFFFD,
970         0x100000, 0x10FFFD
971 };
972
973 /* End of stringprep tables */
974
975
976 /* Is the given Unicode codepoint in the given table of ranges? */
977 #define IS_CODE_IN_TABLE(code, map) is_code_in_table(code, map, lengthof(map))
978
979 static int
980 codepoint_range_cmp(const void *a, const void *b)
981 {
982         const pg_wchar *key = (const pg_wchar *) a;
983         const pg_wchar *range = (const pg_wchar *) b;
984
985         if (*key < range[0])
986                 return -1;                              /* less than lower bound */
987         if (*key > range[1])
988                 return 1;                               /* greater than upper bound */
989
990         return 0;                                       /* within range */
991 }
992
993 static bool
994 is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize)
995 {
996         Assert(mapsize % 2 == 0);
997
998         if (code < map[0] || code > map[mapsize - 1])
999                 return false;
1000
1001         if (bsearch(&code, map, mapsize / 2, sizeof(pg_wchar) * 2,
1002                                 codepoint_range_cmp))
1003                 return true;
1004         else
1005                 return false;
1006 }
1007
1008 /*
1009  * Calculate the length in characters of a null-terminated UTF-8 string.
1010  *
1011  * Returns -1 if the input is not valid UTF-8.
1012  */
1013 static int
1014 pg_utf8_string_len(const char *source)
1015 {
1016         const unsigned char *p = (const unsigned char *) source;
1017         int                     l;
1018         int                     num_chars = 0;
1019
1020         while (*p)
1021         {
1022                 l = pg_utf_mblen(p);
1023
1024                 if (!pg_utf8_islegal(p, l))
1025                         return -1;
1026
1027                 p += l;
1028                 num_chars++;
1029         }
1030
1031         return num_chars;
1032 }
1033
1034 /*
1035  * Returns true if the input string is pure ASCII.
1036  */
1037 static bool
1038 pg_is_ascii_string(const char *p)
1039 {
1040         while (*p)
1041         {
1042                 if (IS_HIGHBIT_SET(*p))
1043                         return false;
1044                 p++;
1045         }
1046         return true;
1047 }
1048
1049
1050 /*
1051  * pg_saslprep - Normalize a password with SASLprep.
1052  *
1053  * SASLprep requires the input to be in UTF-8 encoding, but PostgreSQL
1054  * supports many encodings, so we don't blindly assume that.  pg_saslprep
1055  * will check if the input looks like valid UTF-8, and returns
1056  * SASLPREP_INVALID_UTF8 if not.
1057  *
1058  * If the string contains prohibited characters (or more precisely, if the
1059  * output string would contain prohibited characters after normalization),
1060  * returns SASLPREP_PROHIBITED.
1061  *
1062  * On success, returns SASLPREP_SUCCESS, and the normalized string in
1063  * *output.
1064  *
1065  * In frontend, the normalized string is malloc'd, and the caller is
1066  * responsible for freeing it.  If an allocation fails, returns
1067  * SASLPREP_OOM.  In backend, the normalized string is palloc'd instead,
1068  * and a failed allocation leads to ereport(ERROR).
1069  */
1070 pg_saslprep_rc
1071 pg_saslprep(const char *input, char **output)
1072 {
1073         pg_wchar   *input_chars = NULL;
1074         pg_wchar   *output_chars = NULL;
1075         int                     input_size;
1076         char       *result;
1077         int                     result_size;
1078         int                     count;
1079         int                     i;
1080         bool            contains_RandALCat;
1081         unsigned char *p;
1082         pg_wchar   *wp;
1083
1084         /* Check that the password isn't stupendously long */
1085         if (strlen(input) > MAX_PASSWORD_LENGTH)
1086         {
1087 #ifndef FRONTEND
1088                 ereport(ERROR,
1089                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1090                                  errmsg("password too long")));
1091 #else
1092                 return SASLPREP_OOM;
1093 #endif
1094         }
1095
1096         /*
1097          * Quick check if the input is pure ASCII.  An ASCII string requires no
1098          * further processing.
1099          */
1100         if (pg_is_ascii_string(input))
1101         {
1102                 *output = STRDUP(input);
1103                 if (!(*output))
1104                         goto oom;
1105                 return SASLPREP_SUCCESS;
1106         }
1107
1108         /*
1109          * Convert the input from UTF-8 to an array of Unicode codepoints.
1110          *
1111          * This also checks that the input is a legal UTF-8 string.
1112          */
1113         input_size = pg_utf8_string_len(input);
1114         if (input_size < 0)
1115         {
1116                 *output = NULL;
1117                 return SASLPREP_INVALID_UTF8;
1118         }
1119
1120         input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar));
1121         if (!input_chars)
1122                 goto oom;
1123
1124         p = (unsigned char *) input;
1125         for (i = 0; i < input_size; i++)
1126         {
1127                 input_chars[i] = utf8_to_unicode(p);
1128                 p += pg_utf_mblen(p);
1129         }
1130         input_chars[i] = (pg_wchar) '\0';
1131
1132         /*
1133          * The steps below correspond to the steps listed in [RFC3454], Section
1134          * "2. Preparation Overview"
1135          */
1136
1137         /*
1138          * 1) Map -- For each character in the input, check if it has a mapping
1139          * and, if so, replace it with its mapping.
1140          */
1141         count = 0;
1142         for (i = 0; i < input_size; i++)
1143         {
1144                 pg_wchar        code = input_chars[i];
1145
1146                 if (IS_CODE_IN_TABLE(code, non_ascii_space_ranges))
1147                         input_chars[count++] = 0x0020;
1148                 else if (IS_CODE_IN_TABLE(code, commonly_mapped_to_nothing_ranges))
1149                 {
1150                         /* map to nothing */
1151                 }
1152                 else
1153                         input_chars[count++] = code;
1154         }
1155         input_chars[count] = (pg_wchar) '\0';
1156         input_size = count;
1157
1158         if (input_size == 0)
1159                 goto prohibited;                /* don't allow empty password */
1160
1161         /*
1162          * 2) Normalize -- Normalize the result of step 1 using Unicode
1163          * normalization.
1164          */
1165         output_chars = unicode_normalize_kc(input_chars);
1166         if (!output_chars)
1167                 goto oom;
1168
1169         /*
1170          * 3) Prohibit -- Check for any characters that are not allowed in the
1171          * output.  If any are found, return an error.
1172          */
1173         for (i = 0; i < input_size; i++)
1174         {
1175                 pg_wchar        code = input_chars[i];
1176
1177                 if (IS_CODE_IN_TABLE(code, prohibited_output_ranges))
1178                         goto prohibited;
1179                 if (IS_CODE_IN_TABLE(code, unassigned_codepoint_ranges))
1180                         goto prohibited;
1181         }
1182
1183         /*
1184          * 4) Check bidi -- Possibly check for right-to-left characters, and if
1185          * any are found, make sure that the whole string satisfies the
1186          * requirements for bidirectional strings.  If the string does not satisfy
1187          * the requirements for bidirectional strings, return an error.
1188          *
1189          * [RFC3454], Section "6. Bidirectional Characters" explains in more
1190          * detail what that means:
1191          *
1192          * "In any profile that specifies bidirectional character handling, all
1193          * three of the following requirements MUST be met:
1194          *
1195          * 1) The characters in section 5.8 MUST be prohibited.
1196          *
1197          * 2) If a string contains any RandALCat character, the string MUST NOT
1198          * contain any LCat character.
1199          *
1200          * 3) If a string contains any RandALCat character, a RandALCat character
1201          * MUST be the first character of the string, and a RandALCat character
1202          * MUST be the last character of the string."
1203          */
1204         contains_RandALCat = false;
1205         for (i = 0; i < input_size; i++)
1206         {
1207                 pg_wchar        code = input_chars[i];
1208
1209                 if (IS_CODE_IN_TABLE(code, RandALCat_codepoint_ranges))
1210                 {
1211                         contains_RandALCat = true;
1212                         break;
1213                 }
1214         }
1215
1216         if (contains_RandALCat)
1217         {
1218                 pg_wchar        first = input_chars[0];
1219                 pg_wchar        last = input_chars[input_size - 1];
1220
1221                 for (i = 0; i < input_size; i++)
1222                 {
1223                         pg_wchar        code = input_chars[i];
1224
1225                         if (IS_CODE_IN_TABLE(code, LCat_codepoint_ranges))
1226                                 goto prohibited;
1227                 }
1228
1229                 if (!IS_CODE_IN_TABLE(first, RandALCat_codepoint_ranges) ||
1230                         !IS_CODE_IN_TABLE(last, RandALCat_codepoint_ranges))
1231                         goto prohibited;
1232         }
1233
1234         /*
1235          * Finally, convert the result back to UTF-8.
1236          */
1237         result_size = 0;
1238         for (wp = output_chars; *wp; wp++)
1239         {
1240                 unsigned char buf[4];
1241
1242                 unicode_to_utf8(*wp, buf);
1243                 result_size += pg_utf_mblen(buf);
1244         }
1245
1246         result = ALLOC(result_size + 1);
1247         if (!result)
1248                 goto oom;
1249         p = (unsigned char *) result;
1250         for (wp = output_chars; *wp; wp++)
1251         {
1252                 unicode_to_utf8(*wp, p);
1253                 p += pg_utf_mblen(p);
1254         }
1255         Assert((char *) p == result + result_size);
1256         *p = '\0';
1257
1258         FREE(input_chars);
1259         FREE(output_chars);
1260
1261         *output = result;
1262         return SASLPREP_SUCCESS;
1263
1264 prohibited:
1265         if (input_chars)
1266                 FREE(input_chars);
1267         if (output_chars)
1268                 FREE(output_chars);
1269
1270         return SASLPREP_PROHIBITED;
1271
1272 oom:
1273         if (input_chars)
1274                 FREE(input_chars);
1275         if (output_chars)
1276                 FREE(output_chars);
1277
1278         return SASLPREP_OOM;
1279 }