From e86fdb0ab224eaa73d907ab16a2dd0e0058699e0 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 26 Aug 2011 10:41:31 -0400 Subject: [PATCH] Support non-ASCII letters in psql variable names. As in the backend, the implementation actually accepts any non-ASCII character, but we only document that you can use letters. --- doc/src/sgml/ref/psql-ref.sgml | 25 +++++----- src/bin/psql/command.c | 6 +-- src/bin/psql/psqlscan.l | 87 +++++++++++++++++++++++++++------- src/bin/psql/variables.c | 34 ++++++++++++- src/bin/psql/variables.h | 4 -- 5 files changed, 119 insertions(+), 37 deletions(-) diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml index 2db4adff4b..7e30c57c35 100644 --- a/doc/src/sgml/ref/psql-ref.sgml +++ b/doc/src/sgml/ref/psql-ref.sgml @@ -2206,7 +2206,7 @@ lo_import 152801 - Valid variable names can contain characters, digits, and + Valid variable names can contain letters, digits, and underscores. See the section below for details. @@ -2461,8 +2461,12 @@ lo_import 152801 psql provides variable substitution features similar to common Unix command shells. Variables are simply name/value pairs, where the value - can be any string of any length. To set variables, use the - psql meta-command + can be any string of any length. The name must consist of letters + (including non-Latin letters), digits, and underscores. + + + + To set a variable, use the psql meta-command \set: testdb=> \set foo bar @@ -2498,16 +2502,15 @@ bar - psql's internal variable names can - consist of letters, numbers, and underscores in any order and any - number of them. A number of these variables are treated specially - by psql. They indicate certain option + A number of these variables are treated specially + by psql. They represent certain option settings that can be changed at run time by altering the value of - the variable or that represent some state of the application. Although - you can use these variables for any other purpose, this is not + the variable, or in some cases represent changeable state of + psql. Although + you can use these variables for other purposes, this is not recommended, as the program behavior might grow really strange - really quickly. By convention, all specially treated variables - consist of all upper-case letters (and possibly numbers and + really quickly. By convention, all specially treated variables' names + consist of all upper-case ASCII letters (and possibly digits and underscores). To ensure maximum compatibility in the future, avoid using such variable names for your own purposes. A list of all specially treated variables follows. diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c index d6a925e435..6d9cd6492f 100644 --- a/src/bin/psql/command.c +++ b/src/bin/psql/command.c @@ -995,7 +995,7 @@ exec_command(const char *cmd, if (!SetVariable(pset.vars, opt, result)) { - psql_error("\\%s: error\n", cmd); + psql_error("\\%s: error while setting variable\n", cmd); success = false; } @@ -1096,7 +1096,7 @@ exec_command(const char *cmd, if (!SetVariable(pset.vars, opt0, newval)) { - psql_error("\\%s: error\n", cmd); + psql_error("\\%s: error while setting variable\n", cmd); success = false; } free(newval); @@ -1272,7 +1272,7 @@ exec_command(const char *cmd, } else if (!SetVariable(pset.vars, opt, NULL)) { - psql_error("\\%s: error\n", cmd); + psql_error("\\%s: error while setting variable\n", cmd); success = false; } free(opt); diff --git a/src/bin/psql/psqlscan.l b/src/bin/psql/psqlscan.l index 8439c865bf..1df8f3aa4f 100644 --- a/src/bin/psql/psqlscan.l +++ b/src/bin/psql/psqlscan.l @@ -120,6 +120,7 @@ static bool var_is_current_source(PsqlScanState state, const char *varname); static YY_BUFFER_STATE prepare_buffer(const char *txt, int len, char **txtcopy); static void emit(const char *txt, int len); +static char *extract_substring(const char *txt, int len); static void escape_variable(bool as_ident); #define ECHO emit(yytext, yyleng) @@ -384,6 +385,9 @@ realfail2 ({integer}|{decimal})[Ee][-+] param \${integer} +/* psql-specific: characters allowed in variable names */ +variable_char [A-Za-z\200-\377_0-9] + other . /* @@ -680,11 +684,12 @@ other . return LEXRES_BACKSLASH; } -:[A-Za-z0-9_]+ { +:{variable_char}+ { /* Possible psql variable substitution */ - const char *varname = yytext + 1; + char *varname; const char *value; + varname = extract_substring(yytext + 1, yyleng - 1); value = GetVariable(pset.vars, varname); if (value) @@ -713,13 +718,15 @@ other . */ ECHO; } + + free(varname); } -:'[A-Za-z0-9_]+' { +:'{variable_char}+' { escape_variable(false); } -:\"[A-Za-z0-9_]+\" { +:\"{variable_char}+\" { escape_variable(true); } @@ -728,13 +735,13 @@ other . * two rules above fails to match completely. */ -:'[A-Za-z0-9_]* { +:'{variable_char}* { /* Throw back everything but the colon */ yyless(1); ECHO; } -:\"[A-Za-z0-9_]* { +:\"{variable_char}* { /* Throw back everything but the colon */ yyless(1); ECHO; @@ -930,15 +937,18 @@ other . } } -:[A-Za-z0-9_]+ { +:{variable_char}+ { /* Possible psql variable substitution */ if (option_type == OT_VERBATIM) ECHO; else { + char *varname; const char *value; - value = GetVariable(pset.vars, yytext + 1); + varname = extract_substring(yytext + 1, yyleng - 1); + value = GetVariable(pset.vars, varname); + free(varname); /* * The variable value is just emitted without any @@ -956,7 +966,7 @@ other . return LEXRES_OK; } -:'[A-Za-z0-9_]+' { +:'{variable_char}+' { if (option_type == OT_VERBATIM) ECHO; else @@ -967,7 +977,7 @@ other . } -:\"[A-Za-z0-9_]+\" { +:\"{variable_char}+\" { if (option_type == OT_VERBATIM) ECHO; else @@ -977,14 +987,14 @@ other . } } -:'[A-Za-z0-9_]* { +:'{variable_char}* { /* Throw back everything but the colon */ yyless(1); ECHO; BEGIN(xslashdefaultarg); } -:\"[A-Za-z0-9_]* { +:\"{variable_char}* { /* Throw back everything but the colon */ yyless(1); ECHO; @@ -1844,16 +1854,58 @@ emit(const char *txt, int len) } } +/* + * extract_substring --- fetch the true value of (part of) the current token + * + * This is like emit(), except that the data is returned as a malloc'd string + * rather than being pushed directly to output_buf. + */ +static char * +extract_substring(const char *txt, int len) +{ + char *result = (char *) pg_malloc(len + 1); + + if (cur_state->safe_encoding) + memcpy(result, txt, len); + else + { + /* Gotta do it the hard way */ + const char *reference = cur_state->refline; + int i; + + reference += (txt - cur_state->curline); + + for (i = 0; i < len; i++) + { + char ch = txt[i]; + + if (ch == (char) 0xFF) + ch = reference[i]; + result[i] = ch; + } + } + result[len] = '\0'; + return result; +} + +/* + * escape_variable --- process :'VARIABLE' or :"VARIABLE" + * + * If the variable name is found, escape its value using the appropriate + * quoting method and emit the value to output_buf. (Since the result is + * surely quoted, there is never any reason to rescan it.) If we don't + * find the variable or the escaping function fails, emit the token as-is. + */ static void escape_variable(bool as_ident) { - char saved_char; + char *varname; const char *value; /* Variable lookup. */ - saved_char = yytext[yyleng - 1]; - yytext[yyleng - 1] = '\0'; - value = GetVariable(pset.vars, yytext + 2); + varname = extract_substring(yytext + 2, yyleng - 3); + value = GetVariable(pset.vars, varname); + free(varname); /* Escaping. */ if (value) @@ -1870,9 +1922,11 @@ escape_variable(bool as_ident) else escaped_value = PQescapeLiteral(pset.db, value, strlen(value)); + if (escaped_value == NULL) { const char *error = PQerrorMessage(pset.db); + psql_error("%s", error); } else @@ -1888,6 +1942,5 @@ escape_variable(bool as_ident) * If we reach this point, some kind of error has occurred. Emit the * original text into the output buffer. */ - yytext[yyleng - 1] = saved_char; emit(yytext, yyleng); } diff --git a/src/bin/psql/variables.c b/src/bin/psql/variables.c index 3825289337..a43c786bf0 100644 --- a/src/bin/psql/variables.c +++ b/src/bin/psql/variables.c @@ -6,10 +6,40 @@ * src/bin/psql/variables.c */ #include "postgres_fe.h" + #include "common.h" #include "variables.h" +/* + * Check whether a variable's name is allowed. + * + * We allow any non-ASCII character, as well as ASCII letters, digits, and + * underscore. Keep this in sync with the definition of variable_char in + * psqlscan.l. + */ +static bool +valid_variable_name(const char *name) +{ + const unsigned char *ptr = (const unsigned char *) name; + + /* Mustn't be zero-length */ + if (*ptr == '\0') + return false; + + while (*ptr) + { + if (IS_HIGHBIT_SET(*ptr) || + strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" + "_0123456789", *ptr) != NULL) + ptr++; + else + return false; + } + + return true; +} + /* * A "variable space" is represented by an otherwise-unused struct _variable * that serves as list header. @@ -158,7 +188,7 @@ SetVariable(VariableSpace space, const char *name, const char *value) if (!space) return false; - if (strspn(name, VALID_VARIABLE_CHARS) != strlen(name)) + if (!valid_variable_name(name)) return false; if (!value) @@ -202,7 +232,7 @@ SetVariableAssignHook(VariableSpace space, const char *name, VariableAssignHook if (!space) return false; - if (strspn(name, VALID_VARIABLE_CHARS) != strlen(name)) + if (!valid_variable_name(name)) return false; for (previous = space, current = space->next; diff --git a/src/bin/psql/variables.h b/src/bin/psql/variables.h index 4197069b4b..865391dba7 100644 --- a/src/bin/psql/variables.h +++ b/src/bin/psql/variables.h @@ -32,10 +32,6 @@ struct _variable typedef struct _variable *VariableSpace; -/* Allowed chars in a variable's name */ -#define VALID_VARIABLE_CHARS "abcdefghijklmnopqrstuvwxyz"\ - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789_" - VariableSpace CreateVariableSpace(void); const char *GetVariable(VariableSpace space, const char *name); -- 2.40.0