]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/ascii.c
Update copyright for 2019
[postgresql] / src / backend / utils / adt / ascii.c
1 /*-----------------------------------------------------------------------
2  * ascii.c
3  *       The PostgreSQL routine for string to ascii conversion.
4  *
5  *       Portions Copyright (c) 1999-2019, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *        src/backend/utils/adt/ascii.c
9  *
10  *-----------------------------------------------------------------------
11  */
12 #include "postgres.h"
13
14 #include "mb/pg_wchar.h"
15 #include "utils/ascii.h"
16 #include "utils/builtins.h"
17
18 static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
19                         unsigned char *dest, int enc);
20 static text *encode_to_ascii(text *data, int enc);
21
22
23 /* ----------
24  * to_ascii
25  * ----------
26  */
27 static void
28 pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
29 {
30         unsigned char *x;
31         const unsigned char *ascii;
32         int                     range;
33
34         /*
35          * relevant start for an encoding
36          */
37 #define RANGE_128       128
38 #define RANGE_160       160
39
40         if (enc == PG_LATIN1)
41         {
42                 /*
43                  * ISO-8859-1 <range: 160 -- 255>
44                  */
45                 ascii = (const unsigned char *) "  cL Y  \"Ca  -R     'u .,      ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
46                 range = RANGE_160;
47         }
48         else if (enc == PG_LATIN2)
49         {
50                 /*
51                  * ISO-8859-2 <range: 160 -- 255>
52                  */
53                 ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
54                 range = RANGE_160;
55         }
56         else if (enc == PG_LATIN9)
57         {
58                 /*
59                  * ISO-8859-15 <range: 160 -- 255>
60                  */
61                 ascii = (const unsigned char *) "  cL YS sCa  -R     Zu .z   EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
62                 range = RANGE_160;
63         }
64         else if (enc == PG_WIN1250)
65         {
66                 /*
67                  * Window CP1250 <range: 128 -- 255>
68                  */
69                 ascii = (const unsigned char *) "  ' \"    %S<STZZ `'\"\".--  s>stzz   L A  \"CS  -RZ  ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
70                 range = RANGE_128;
71         }
72         else
73         {
74                 ereport(ERROR,
75                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
76                                  errmsg("encoding conversion from %s to ASCII not supported",
77                                                 pg_encoding_to_char(enc))));
78                 return;                                 /* keep compiler quiet */
79         }
80
81         /*
82          * Encode
83          */
84         for (x = src; x < src_end; x++)
85         {
86                 if (*x < 128)
87                         *dest++ = *x;
88                 else if (*x < range)
89                         *dest++ = ' ';          /* bogus 128 to 'range' */
90                 else
91                         *dest++ = ascii[*x - range];
92         }
93 }
94
95 /* ----------
96  * encode text
97  *
98  * The text datum is overwritten in-place, therefore this coding method
99  * cannot support conversions that change the string length!
100  * ----------
101  */
102 static text *
103 encode_to_ascii(text *data, int enc)
104 {
105         pg_to_ascii((unsigned char *) VARDATA(data),    /* src */
106                                 (unsigned char *) (data) + VARSIZE(data),       /* src end */
107                                 (unsigned char *) VARDATA(data),        /* dest */
108                                 enc);                   /* encoding */
109
110         return data;
111 }
112
113 /* ----------
114  * convert to ASCII - enc is set as 'name' arg.
115  * ----------
116  */
117 Datum
118 to_ascii_encname(PG_FUNCTION_ARGS)
119 {
120         text       *data = PG_GETARG_TEXT_P_COPY(0);
121         char       *encname = NameStr(*PG_GETARG_NAME(1));
122         int                     enc = pg_char_to_encoding(encname);
123
124         if (enc < 0)
125                 ereport(ERROR,
126                                 (errcode(ERRCODE_UNDEFINED_OBJECT),
127                                  errmsg("%s is not a valid encoding name", encname)));
128
129         PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
130 }
131
132 /* ----------
133  * convert to ASCII - enc is set as int4
134  * ----------
135  */
136 Datum
137 to_ascii_enc(PG_FUNCTION_ARGS)
138 {
139         text       *data = PG_GETARG_TEXT_P_COPY(0);
140         int                     enc = PG_GETARG_INT32(1);
141
142         if (!PG_VALID_ENCODING(enc))
143                 ereport(ERROR,
144                                 (errcode(ERRCODE_UNDEFINED_OBJECT),
145                                  errmsg("%d is not a valid encoding code", enc)));
146
147         PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
148 }
149
150 /* ----------
151  * convert to ASCII - current enc is DatabaseEncoding
152  * ----------
153  */
154 Datum
155 to_ascii_default(PG_FUNCTION_ARGS)
156 {
157         text       *data = PG_GETARG_TEXT_P_COPY(0);
158         int                     enc = GetDatabaseEncoding();
159
160         PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
161 }
162
163 /* ----------
164  * Copy a string in an arbitrary backend-safe encoding, converting it to a
165  * valid ASCII string by replacing non-ASCII bytes with '?'.  Otherwise the
166  * behavior is identical to strlcpy(), except that we don't bother with a
167  * return value.
168  *
169  * This must not trigger ereport(ERROR), as it is called in postmaster.
170  * ----------
171  */
172 void
173 ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
174 {
175         if (destsiz == 0)                       /* corner case: no room for trailing nul */
176                 return;
177
178         while (--destsiz > 0)
179         {
180                 /* use unsigned char here to avoid compiler warning */
181                 unsigned char ch = *src++;
182
183                 if (ch == '\0')
184                         break;
185                 /* Keep printable ASCII characters */
186                 if (32 <= ch && ch <= 127)
187                         *dest = ch;
188                 /* White-space is also OK */
189                 else if (ch == '\n' || ch == '\r' || ch == '\t')
190                         *dest = ch;
191                 /* Everything else is replaced with '?' */
192                 else
193                         *dest = '?';
194                 dest++;
195         }
196
197         *dest = '\0';
198 }