]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/ascii.c
Update copyright for 2014
[postgresql] / src / backend / utils / adt / ascii.c
1 /*-----------------------------------------------------------------------
2  * ascii.c
3  *       The PostgreSQL routine for string to ascii conversion.
4  *
5  *       Portions Copyright (c) 1999-2014, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *        src/backend/utils/adt/ascii.c
9  *
10  *-----------------------------------------------------------------------
11  */
12 #include "postgres.h"
13
14 #include "mb/pg_wchar.h"
15 #include "utils/ascii.h"
16
17 static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
18                         unsigned char *dest, int enc);
19 static text *encode_to_ascii(text *data, int enc);
20
21
22 /* ----------
23  * to_ascii
24  * ----------
25  */
26 static void
27 pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
28 {
29         unsigned char *x;
30         const unsigned char *ascii;
31         int                     range;
32
33         /*
34          * relevant start for an encoding
35          */
36 #define RANGE_128       128
37 #define RANGE_160       160
38
39         if (enc == PG_LATIN1)
40         {
41                 /*
42                  * ISO-8859-1 <range: 160 -- 255>
43                  */
44                 ascii = (const unsigned char *) "  cL Y  \"Ca  -R     'u .,      ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
45                 range = RANGE_160;
46         }
47         else if (enc == PG_LATIN2)
48         {
49                 /*
50                  * ISO-8859-2 <range: 160 -- 255>
51                  */
52                 ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
53                 range = RANGE_160;
54         }
55         else if (enc == PG_LATIN9)
56         {
57                 /*
58                  * ISO-8859-15 <range: 160 -- 255>
59                  */
60                 ascii = (const unsigned char *) "  cL YS sCa  -R     Zu .z   EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
61                 range = RANGE_160;
62         }
63         else if (enc == PG_WIN1250)
64         {
65                 /*
66                  * Window CP1250 <range: 128 -- 255>
67                  */
68                 ascii = (const unsigned char *) "  ' \"    %S<STZZ `'\"\".--  s>stzz   L A  \"CS  -RZ  ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
69                 range = RANGE_128;
70         }
71         else
72         {
73                 ereport(ERROR,
74                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
75                                  errmsg("encoding conversion from %s to ASCII not supported",
76                                                 pg_encoding_to_char(enc))));
77                 return;                                 /* keep compiler quiet */
78         }
79
80         /*
81          * Encode
82          */
83         for (x = src; x < src_end; x++)
84         {
85                 if (*x < 128)
86                         *dest++ = *x;
87                 else if (*x < range)
88                         *dest++ = ' ';          /* bogus 128 to 'range' */
89                 else
90                         *dest++ = ascii[*x - range];
91         }
92 }
93
94 /* ----------
95  * encode text
96  *
97  * The text datum is overwritten in-place, therefore this coding method
98  * cannot support conversions that change the string length!
99  * ----------
100  */
101 static text *
102 encode_to_ascii(text *data, int enc)
103 {
104         pg_to_ascii((unsigned char *) VARDATA(data),            /* src */
105                                 (unsigned char *) (data) + VARSIZE(data),               /* src end */
106                                 (unsigned char *) VARDATA(data),                /* dest */
107                                 enc);                   /* encoding */
108
109         return data;
110 }
111
112 /* ----------
113  * convert to ASCII - enc is set as 'name' arg.
114  * ----------
115  */
116 Datum
117 to_ascii_encname(PG_FUNCTION_ARGS)
118 {
119         text       *data = PG_GETARG_TEXT_P_COPY(0);
120         char       *encname = NameStr(*PG_GETARG_NAME(1));
121         int                     enc = pg_char_to_encoding(encname);
122
123         if (enc < 0)
124                 ereport(ERROR,
125                                 (errcode(ERRCODE_UNDEFINED_OBJECT),
126                                  errmsg("%s is not a valid encoding name", encname)));
127
128         PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
129 }
130
131 /* ----------
132  * convert to ASCII - enc is set as int4
133  * ----------
134  */
135 Datum
136 to_ascii_enc(PG_FUNCTION_ARGS)
137 {
138         text       *data = PG_GETARG_TEXT_P_COPY(0);
139         int                     enc = PG_GETARG_INT32(1);
140
141         if (!PG_VALID_ENCODING(enc))
142                 ereport(ERROR,
143                                 (errcode(ERRCODE_UNDEFINED_OBJECT),
144                                  errmsg("%d is not a valid encoding code", enc)));
145
146         PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
147 }
148
149 /* ----------
150  * convert to ASCII - current enc is DatabaseEncoding
151  * ----------
152  */
153 Datum
154 to_ascii_default(PG_FUNCTION_ARGS)
155 {
156         text       *data = PG_GETARG_TEXT_P_COPY(0);
157         int                     enc = GetDatabaseEncoding();
158
159         PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
160 }
161
162 /* ----------
163  * Copy a string in an arbitrary backend-safe encoding, converting it to a
164  * valid ASCII string by replacing non-ASCII bytes with '?'.  Otherwise the
165  * behavior is identical to strlcpy(), except that we don't bother with a
166  * return value.
167  *
168  * This must not trigger ereport(ERROR), as it is called in postmaster.
169  * ----------
170  */
171 void
172 ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
173 {
174         if (destsiz == 0)                       /* corner case: no room for trailing nul */
175                 return;
176
177         while (--destsiz > 0)
178         {
179                 /* use unsigned char here to avoid compiler warning */
180                 unsigned char ch = *src++;
181
182                 if (ch == '\0')
183                         break;
184                 /* Keep printable ASCII characters */
185                 if (32 <= ch && ch <= 127)
186                         *dest = ch;
187                 /* White-space is also OK */
188                 else if (ch == '\n' || ch == '\r' || ch == '\t')
189                         *dest = ch;
190                 /* Everything else is replaced with '?' */
191                 else
192                         *dest = '?';
193                 dest++;
194         }
195
196         *dest = '\0';
197 }