]> granicus.if.org Git - postgresql/blob - src/backend/parser/keywords.c
Change case-folding of keywords to conform to SQL99 and fix misbehavior
[postgresql] / src / backend / parser / keywords.c
1 /*-------------------------------------------------------------------------
2  *
3  * keywords.c
4  *        lexical token lookup for reserved words in PostgreSQL
5  *
6  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.89 2001/02/21 18:53:46 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include <ctype.h>
18
19 #include "nodes/parsenodes.h"
20 #include "parser/keywords.h"
21 #include "parser/parse.h"
22
23 /*
24  * List of (keyword-name, keyword-token-value) pairs.
25  *
26  * !!WARNING!!: This list must be sorted, because binary
27  *               search is used to locate entries.
28  */
29 static ScanKeyword ScanKeywords[] = {
30         /* name, value */
31         {"abort", ABORT_TRANS},
32         {"absolute", ABSOLUTE},
33         {"access", ACCESS},
34         {"action", ACTION},
35         {"add", ADD},
36         {"after", AFTER},
37         {"aggregate", AGGREGATE},
38         {"all", ALL},
39         {"alter", ALTER},
40         {"analyse", ANALYSE}, /* British spelling */
41         {"analyze", ANALYZE},
42         {"and", AND},
43         {"any", ANY},
44         {"as", AS},
45         {"asc", ASC},
46         {"at", AT},
47         {"backward", BACKWARD},
48         {"before", BEFORE},
49         {"begin", BEGIN_TRANS},
50         {"between", BETWEEN},
51         {"binary", BINARY},
52         {"bit", BIT},
53         {"both", BOTH},
54         {"by", BY},
55         {"cache", CACHE},
56         {"cascade", CASCADE},
57         {"case", CASE},
58         {"cast", CAST},
59         {"chain", CHAIN},
60         {"char", CHAR},
61         {"character", CHARACTER},
62         {"characteristics", CHARACTERISTICS},
63         {"check", CHECK},
64         {"checkpoint", CHECKPOINT},
65         {"close", CLOSE},
66         {"cluster", CLUSTER},
67         {"coalesce", COALESCE},
68         {"collate", COLLATE},
69         {"column", COLUMN},
70         {"comment", COMMENT},
71         {"commit", COMMIT},
72         {"committed", COMMITTED},
73         {"constraint", CONSTRAINT},
74         {"constraints", CONSTRAINTS},
75         {"copy", COPY},
76         {"create", CREATE},
77         {"createdb", CREATEDB},
78         {"createuser", CREATEUSER},
79         {"cross", CROSS},
80         {"current_date", CURRENT_DATE},
81         {"current_time", CURRENT_TIME},
82         {"current_timestamp", CURRENT_TIMESTAMP},
83         {"current_user", CURRENT_USER},
84         {"cursor", CURSOR},
85         {"cycle", CYCLE},
86         {"database", DATABASE},
87         {"day", DAY_P},
88         {"dec", DEC},
89         {"decimal", DECIMAL},
90         {"declare", DECLARE},
91         {"default", DEFAULT},
92         {"deferrable", DEFERRABLE},
93         {"deferred", DEFERRED},
94         {"delete", DELETE},
95         {"delimiters", DELIMITERS},
96         {"desc", DESC},
97         {"distinct", DISTINCT},
98         {"do", DO},
99         {"double", DOUBLE},
100         {"drop", DROP},
101         {"each", EACH},
102         {"else", ELSE},
103         {"encoding", ENCODING},
104         {"end", END_TRANS},
105         {"escape", ESCAPE},
106         {"except", EXCEPT},
107         {"exclusive", EXCLUSIVE},
108         {"execute", EXECUTE},
109         {"exists", EXISTS},
110         {"explain", EXPLAIN},
111         {"extend", EXTEND},
112         {"extract", EXTRACT},
113         {"false", FALSE_P},
114         {"fetch", FETCH},
115         {"float", FLOAT},
116         {"for", FOR},
117         {"force", FORCE},
118         {"foreign", FOREIGN},
119         {"forward", FORWARD},
120         {"from", FROM},
121         {"full", FULL},
122         {"function", FUNCTION},
123         {"global", GLOBAL},
124         {"grant", GRANT},
125         {"group", GROUP},
126         {"handler", HANDLER},
127         {"having", HAVING},
128         {"hour", HOUR_P},
129         {"ilike", ILIKE},
130         {"immediate", IMMEDIATE},
131         {"in", IN},
132         {"increment", INCREMENT},
133         {"index", INDEX},
134         {"inherits", INHERITS},
135         {"initially", INITIALLY},
136         {"inner", INNER_P},
137         {"inout", INOUT},
138         {"insensitive", INSENSITIVE},
139         {"insert", INSERT},
140         {"instead", INSTEAD},
141         {"intersect", INTERSECT},
142         {"interval", INTERVAL},
143         {"into", INTO},
144         {"is", IS},
145         {"isnull", ISNULL},
146         {"isolation", ISOLATION},
147         {"join", JOIN},
148         {"key", KEY},
149         {"lancompiler", LANCOMPILER},
150         {"language", LANGUAGE},
151         {"leading", LEADING},
152         {"left", LEFT},
153         {"level", LEVEL},
154         {"like", LIKE},
155         {"limit", LIMIT},
156         {"listen", LISTEN},
157         {"load", LOAD},
158         {"local", LOCAL},
159         {"location", LOCATION},
160         {"lock", LOCK_P},
161         {"match", MATCH},
162         {"maxvalue", MAXVALUE},
163         {"minute", MINUTE_P},
164         {"minvalue", MINVALUE},
165         {"mode", MODE},
166         {"month", MONTH_P},
167         {"move", MOVE},
168         {"names", NAMES},
169         {"national", NATIONAL},
170         {"natural", NATURAL},
171         {"nchar", NCHAR},
172         {"new", NEW},
173         {"next", NEXT},
174         {"no", NO},
175         {"nocreatedb", NOCREATEDB},
176         {"nocreateuser", NOCREATEUSER},
177         {"none", NONE},
178         {"not", NOT},
179         {"nothing", NOTHING},
180         {"notify", NOTIFY},
181         {"notnull", NOTNULL},
182         {"null", NULL_P},
183         {"nullif", NULLIF},
184         {"numeric", NUMERIC},
185         {"of", OF},
186         {"off", OFF},
187         {"offset", OFFSET},
188         {"oids", OIDS},
189         {"old", OLD},
190         {"on", ON},
191         {"only", ONLY},
192         {"operator", OPERATOR},
193         {"option", OPTION},
194         {"or", OR},
195         {"order", ORDER},
196         {"out", OUT},
197         {"outer", OUTER_P},
198         {"overlaps", OVERLAPS},
199         {"owner", OWNER},
200         {"partial", PARTIAL},
201         {"password", PASSWORD},
202         {"path", PATH_P},
203         {"pendant", PENDANT},
204         {"position", POSITION},
205         {"precision", PRECISION},
206         {"primary", PRIMARY},
207         {"prior", PRIOR},
208         {"privileges", PRIVILEGES},
209         {"procedural", PROCEDURAL},
210         {"procedure", PROCEDURE},
211         {"public", PUBLIC},
212         {"read", READ},
213         {"references", REFERENCES},
214         {"reindex", REINDEX},
215         {"relative", RELATIVE},
216         {"rename", RENAME},
217         {"reset", RESET},
218         {"restrict", RESTRICT},
219         {"returns", RETURNS},
220         {"revoke", REVOKE},
221         {"right", RIGHT},
222         {"rollback", ROLLBACK},
223         {"row", ROW},
224         {"rule", RULE},
225         {"schema", SCHEMA},
226         {"scroll", SCROLL},
227         {"second", SECOND_P},
228         {"select", SELECT},
229         {"sequence", SEQUENCE},
230         {"serial", SERIAL},
231         {"serializable", SERIALIZABLE},
232         {"session", SESSION},
233         {"session_user", SESSION_USER},
234         {"set", SET},
235         {"setof", SETOF},
236         {"share", SHARE},
237         {"show", SHOW},
238         {"some", SOME},
239         {"start", START},
240         {"statement", STATEMENT},
241         {"stdin", STDIN},
242         {"stdout", STDOUT},
243         {"substring", SUBSTRING},
244         {"sysid", SYSID},
245         {"table", TABLE},
246         {"temp", TEMP},
247         {"template", TEMPLATE},
248         {"temporary", TEMPORARY},
249         {"then", THEN},
250         {"time", TIME},
251         {"timestamp", TIMESTAMP},
252         {"timezone_hour", TIMEZONE_HOUR},
253         {"timezone_minute", TIMEZONE_MINUTE},
254         {"to", TO},
255         {"toast", TOAST},
256         {"trailing", TRAILING},
257         {"transaction", TRANSACTION},
258         {"trigger", TRIGGER},
259         {"trim", TRIM},
260         {"true", TRUE_P},
261         {"truncate", TRUNCATE},
262         {"trusted", TRUSTED},
263         {"type", TYPE_P},
264         {"union", UNION},
265         {"unique", UNIQUE},
266         {"unlisten", UNLISTEN},
267         {"until", UNTIL},
268         {"update", UPDATE},
269         {"user", USER},
270         {"using", USING},
271         {"vacuum", VACUUM},
272         {"valid", VALID},
273         {"values", VALUES},
274         {"varchar", VARCHAR},
275         {"varying", VARYING},
276         {"verbose", VERBOSE},
277         {"version", VERSION},
278         {"view", VIEW},
279         {"when", WHEN},
280         {"where", WHERE},
281         {"with", WITH},
282         {"without", WITHOUT},
283         {"work", WORK},
284         {"year", YEAR_P},
285         {"zone", ZONE},
286 };
287
288 /*
289  * ScanKeywordLookup - see if a given word is a keyword
290  *
291  * Returns a pointer to the ScanKeyword table entry, or NULL if no match.
292  *
293  * The match is done case-insensitively.  Note that we deliberately use a
294  * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
295  * even if we are in a locale where tolower() would produce more or different
296  * translations.  This is to conform to the SQL99 spec, which says that
297  * keywords are to be matched in this way even though non-keyword identifiers
298  * receive a different case-normalization mapping.
299  */
300 ScanKeyword *
301 ScanKeywordLookup(char *text)
302 {
303         int                     len,
304                                 i;
305         char            word[NAMEDATALEN];
306         ScanKeyword *low;
307         ScanKeyword *high;
308
309         len = strlen(text);
310         /* We assume all keywords are shorter than NAMEDATALEN. */
311         if (len >= NAMEDATALEN)
312                 return NULL;
313
314         /*
315          * Apply an ASCII-only downcasing.  We must not use tolower() since
316          * it may produce the wrong translation in some locales (eg, Turkish),
317          * and we don't trust isupper() very much either.  In an ASCII-based
318          * encoding the tests against A and Z are sufficient, but we also check
319          * isupper() so that we will work correctly under EBCDIC.  The actual
320          * case conversion step should work for either ASCII or EBCDIC.
321          */
322         for (i = 0; i < len; i++)
323         {
324                 char    ch = text[i];
325
326                 if (ch >= 'A' && ch <= 'Z' && isupper((unsigned char) ch))
327                         ch += 'a' - 'A';
328                 word[i] = ch;
329         }
330         word[len] = '\0';
331
332         /*
333          * Now do a binary search using plain strcmp() comparison.
334          */
335         low = &ScanKeywords[0];
336         high = endof(ScanKeywords) - 1;
337         while (low <= high)
338         {
339                 ScanKeyword *middle;
340                 int                     difference;
341
342                 middle = low + (high - low) / 2;
343                 difference = strcmp(middle->name, word);
344                 if (difference == 0)
345                         return middle;
346                 else if (difference < 0)
347                         low = middle + 1;
348                 else
349                         high = middle - 1;
350         }
351
352         return NULL;
353 }