From 4ed9f1d9b776677ad26cd352dbc0810d7bed6a8f Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Tue, 30 Jan 2007 22:29:23 +0000 Subject: [PATCH] Update documentation for backslashes to mention escape string syntax more, and standard_conforming_strings less, because in the future non-E strings will not treat backslashes specially. Also use E'' strings where backslashes are used in examples. (The existing examples would have drawn warnings.) Backpatch to 8.2.X. --- doc/src/sgml/array.sgml | 18 +++++++------- doc/src/sgml/datatype.sgml | 46 +++++++++++++++++------------------ doc/src/sgml/func.sgml | 50 ++++++++++++++++++++------------------ doc/src/sgml/libpq.sgml | 6 ++--- doc/src/sgml/plperl.sgml | 10 ++++---- doc/src/sgml/plpgsql.sgml | 12 +++------ doc/src/sgml/pltcl.sgml | 12 ++++----- doc/src/sgml/rowtypes.sgml | 7 +++--- doc/src/sgml/xfunc.sgml | 8 +++--- 9 files changed, 82 insertions(+), 87 deletions(-) diff --git a/doc/src/sgml/array.sgml b/doc/src/sgml/array.sgml index 3ed8ce9c04..f888ffc1a7 100644 --- a/doc/src/sgml/array.sgml +++ b/doc/src/sgml/array.sgml @@ -1,4 +1,4 @@ - + Arrays @@ -597,17 +597,17 @@ SELECT f1[1][-2][3] AS e1, f1[1][-1][5] AS e2 - As shown previously, when writing an array value you may write double + As shown previously, when writing an array value you can write double quotes around any individual array element. You must do so if the element value would otherwise confuse the array-value parser. For example, elements containing curly braces, commas (or whatever the delimiter character is), double quotes, backslashes, or leading or trailing whitespace must be double-quoted. Empty strings and strings matching the word NULL must be quoted, too. To put a double quote or - backslash in a - quoted array element value, precede it with a backslash. Alternatively, you - can use backslash-escaping to protect all data characters that would - otherwise be taken as array syntax. + backslash in a quoted array element value, use escape string syntax + and precede it with a backslash. Alternatively, you can use + backslash-escaping to protect all data characters that would otherwise + be taken as array syntax. @@ -625,16 +625,16 @@ SELECT f1[1][-2][3] AS e1, f1[1][-1][5] AS e2 backslashes you need. For example, to insert a text array value containing a backslash and a double quote, you'd need to write -INSERT ... VALUES ('{"\\\\","\\""}'); +INSERT ... VALUES (E'{"\\\\","\\""}'); - The string-literal processor removes one level of backslashes, so that + The escape string processor removes one level of backslashes, so that what arrives at the array-value parser looks like {"\\","\""}. In turn, the strings fed to the text data type's input routine become \ and " respectively. (If we were working with a data type whose input routine also treated backslashes specially, bytea for example, we might need as many as eight backslashes in the command to get one backslash into the stored array element.) - Dollar quoting (see ) may be + Dollar quoting (see ) can be used to avoid the need to double backslashes. diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 10d5a34cf9..1446a8b0fd 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -1,4 +1,4 @@ - + Data Types @@ -1152,11 +1152,9 @@ SELECT b, char_length(b) FROM test2; of a string literal in an SQL statement. In general, to escape an octet, it is converted into the three-digit octal number equivalent of its decimal octet value, and preceded - by two backslashes (or one backslash if - standard_conforming_strings is off). - shows the characters - that must be escaped, and gives the alternate escape sequences - where applicable. + by two backslashes. + shows the characters that must be escaped, and gives the alternate + escape sequences where applicable. @@ -1176,32 +1174,32 @@ SELECT b, char_length(b) FROM test2; 0 zero octet - '\\000' - SELECT '\\000'::bytea; + E'\\000' + SELECT E'\\000'::bytea; \000 39 single quote - '\'' or '\\047' - SELECT '\''::bytea; + '''' or E'\\047' + SELECT E'\''::bytea; ' 92 backslash - '\\\\' or '\\134' - SELECT '\\\\'::bytea; + E'\\\\' or E'\\134' + SELECT E'\\\\'::bytea; \\ 0 to 31 and 127 to 255 non-printable octets - '\\xxx' (octal value) - SELECT '\\001'::bytea; + E'\\xxx' (octal value) + SELECT E'\\001'::bytea; \001 @@ -1224,18 +1222,18 @@ SELECT b, char_length(b) FROM test2; string written as a string literal must pass through two parse phases in the PostgreSQL server. The first backslash of each pair is interpreted as an escape - character by the string-literal parser (assuming - standard_conforming_strings is off) - and is therefore consumed, leaving the second backslash of the - pair. The remaining backslash is then recognized by the + character by the string-literal parser (assuming escape string + syntax is used) and is therefore consumed, leaving the second backslash of the + pair. (Dollar-quoted strings can be used to avoid this level + of escaping.) The remaining backslash is then recognized by the bytea input function as starting either a three digit octal value or escaping another backslash. For example, - a string literal passed to the server as '\\001' + a string literal passed to the server as E'\\001' becomes \001 after passing through the - string-literal parser. The \001 is then sent + escape string parser. The \001 is then sent to the bytea input function, where it is converted to a single octet with a decimal value of 1. Note that the - apostrophe character is not treated specially by bytea, + single-quote character is not treated specially by bytea, so it follows the normal rules for string literals. (See also .) @@ -1269,7 +1267,7 @@ SELECT b, char_length(b) FROM test2; 92 backslash \\ - SELECT '\\134'::bytea; + SELECT E'\\134'::bytea; \\ @@ -1277,7 +1275,7 @@ SELECT b, char_length(b) FROM test2; 0 to 31 and 127 to 255 non-printable octets \xxx (octal value) - SELECT '\\001'::bytea; + SELECT E'\\001'::bytea; \001 @@ -1285,7 +1283,7 @@ SELECT b, char_length(b) FROM test2; 32 to 126 printable octets client character set representation - SELECT '\\176'::bytea; + SELECT E'\\176'::bytea; ~ diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 87a4ce48de..02c95a6f22 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,4 +1,4 @@ - + Functions and Operators @@ -1339,7 +1339,7 @@ Encode binary data to ASCII-only representation. Supported types are: base64, hex, escape. - encode( '123\\000\\001', 'base64') + encode( E'123\\000\\001', 'base64') MTIzAAE= @@ -1439,7 +1439,7 @@ Return the given string suitably quoted to be used as a string literal in an SQL statement string. - Embedded quotes and backslashes are properly doubled. + Embedded single-quotes and backslashes are properly doubled. quote_literal( 'O\'Reilly') 'O''Reilly' @@ -2393,7 +2393,7 @@ concatenation - '\\\\Post'::bytea || '\\047gres\\000'::bytea + E'\\\\Post'::bytea || E'\\047gres\\000'::bytea \\Post'gres\000 @@ -2406,7 +2406,7 @@ get_bit - get_bit('Th\\000omas'::bytea, 45) + get_bit(E'Th\\000omas'::bytea, 45) 1 @@ -2419,7 +2419,7 @@ get_byte - get_byte('Th\\000omas'::bytea, 4) + get_byte(E'Th\\000omas'::bytea, 4) 109 @@ -2427,7 +2427,7 @@ octet_length(string) int Number of bytes in binary string - octet_length( 'jo\\000se'::bytea) + octet_length( E'jo\\000se'::bytea) 5 @@ -2435,7 +2435,7 @@ position(substring in string) int Location of specified substring - position('\\000om'::bytea in 'Th\\000omas'::bytea) + position(E'\\000om'::bytea in E'Th\\000omas'::bytea) 3 @@ -2449,7 +2449,7 @@ set_bit - set_bit('Th\\000omas'::bytea, 45, 0) + set_bit(E'Th\\000omas'::bytea, 45, 0) Th\000omAs @@ -2463,7 +2463,7 @@ set_byte - set_byte('Th\\000omas'::bytea, 4, 64) + set_byte(E'Th\\000omas'::bytea, 4, 64) Th\000o@as @@ -2476,7 +2476,7 @@ substring - substring('Th\\000omas'::bytea from 2 for 3) + substring(E'Th\\000omas'::bytea from 2 for 3) h\000o @@ -2492,7 +2492,7 @@ bytes from the start and end of string - trim('\\000'::bytea from '\\000Tom\\000'::bytea) + trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea) Tom @@ -2530,7 +2530,7 @@ in bytes from the start and end of string - btrim('\\000trim\\000'::bytea, '\\000'::bytea) + btrim(E'\\000trim\\000'::bytea, E'\\000'::bytea) trim @@ -2544,7 +2544,7 @@ Decode binary string from string previously encoded with encode. Parameter type is same as in encode. - decode('123\\000456', 'escape') + decode(E'123\\000456', 'escape') 123\000456 @@ -2558,7 +2558,7 @@ Encode binary string to ASCII-only representation. Supported types are: base64, hex, escape. - encode('123\\000456'::bytea, 'escape') + encode(E'123\\000456'::bytea, 'escape') 123\000456 @@ -2577,7 +2577,7 @@ binary strings, length - length('jo\\000se'::bytea) + length(E'jo\\000se'::bytea) 5 @@ -2588,7 +2588,7 @@ Calculates the MD5 hash of string, returning the result in hexadecimal - md5('Th\\000omas'::bytea) + md5(E'Th\\000omas'::bytea) 8ab2d3c9689aaf18 b4958c334c82d8b1 @@ -2812,7 +2812,8 @@ cast(-44 as bit(12)) 111111010100 Note that the backslash already has a special meaning in string literals, so to write a pattern constant that contains a backslash - you must write two backslashes in an SQL statement. Thus, writing a pattern + you must write two backslashes in an SQL statement (assuming escape + string syntax is used). Thus, writing a pattern that actually matches a literal backslash means writing four backslashes in the statement. You can avoid this by selecting a different escape character with ESCAPE; then a backslash is not special @@ -3106,7 +3107,7 @@ substring('foobar' from 'o(.)b') o substring matching the entire pattern should be inserted. Write \\ if you need to put a literal backslash in the replacement text. (As always, remember to double backslashes written in literal - constant strings.) + constant strings, assuming escape string syntax is used.) The flags parameter is an optional text string containing zero or more single-letter flags that change the function's behavior. Flag i specifies case-insensitive @@ -3121,7 +3122,7 @@ regexp_replace('foobarbaz', 'b..', 'X') fooXbaz regexp_replace('foobarbaz', 'b..', 'X', 'g') fooXX -regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g') +regexp_replace('foobarbaz', 'b(..)', E'X\\1Y', 'g') fooXarYXazY @@ -3283,7 +3284,8 @@ regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g') Remember that the backslash (\) already has a special meaning in PostgreSQL string literals. To write a pattern constant that contains a backslash, - you must write two backslashes in the statement. + you must write two backslashes in the statement, assuming escape + string syntax is used. @@ -3594,7 +3596,7 @@ regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g') Keep in mind that an escape's leading \ will need to be doubled when entering the pattern as an SQL string constant. For example: -'123' ~ '^\\d{3}' true +'123' ~ E'^\\d{3}' true @@ -4756,10 +4758,10 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})'); If you want to have a double quote in the output you must - precede it with a backslash, for example '\\"YYYY + precede it with a backslash, for example E'\\"YYYY Month\\"'. (Two backslashes are necessary because the backslash already - has a special meaning in a string constant.) + has a special meaning when using the escape string syntax.) diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index 0fc0f76fd2..d7b32f9a23 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -1,4 +1,4 @@ - + <application>libpq</application> - C Library @@ -916,7 +916,7 @@ in a numeric form that is much easier to compare against. If no value for standard_conforming_strings is reported, -applications may assume it is false, that is, backslashes +applications may assume it is off, that is, backslashes are treated as escapes in string literals. Also, the presence of this parameter may be taken as an indication that the escape string syntax (E'...') is accepted. @@ -2494,7 +2494,7 @@ unsigned char *PQescapeByteaConn(PGconn *conn, of a bytea literal in an SQL statement. In general, to escape a byte, it is converted into the three digit octal number equal to the octet value, and preceded by - one or two backslashes. The single quote (') and backslash + usually two backslashes. The single quote (') and backslash (\) characters have special alternative escape sequences. See for more information. PQescapeByteaConn performs this diff --git a/doc/src/sgml/plperl.sgml b/doc/src/sgml/plperl.sgml index a94163e7be..ce95e1ed08 100644 --- a/doc/src/sgml/plperl.sgml +++ b/doc/src/sgml/plperl.sgml @@ -1,4 +1,4 @@ - + PL/Perl - Perl Procedural Language @@ -80,10 +80,10 @@ $$ LANGUAGE plperl; the function body to be written as a string constant. It is usually most convenient to use dollar quoting (see ) for the string constant. - If you choose to use regular single-quoted string constant syntax, - you must escape single quote marks (') and backslashes - (\) used in the body of the function, typically by - doubling them (see ). + If you choose to use escape string syntax E'', + you must double the single quote marks (') and backslashes + (\) used in the body of the function + (see ). diff --git a/doc/src/sgml/plpgsql.sgml b/doc/src/sgml/plpgsql.sgml index ec276abb82..7eea925512 100644 --- a/doc/src/sgml/plpgsql.sgml +++ b/doc/src/sgml/plpgsql.sgml @@ -1,4 +1,4 @@ - + <application>PL/pgSQL</application> - <acronym>SQL</acronym> Procedural Language @@ -288,7 +288,8 @@ $$ LANGUAGE plpgsql; CREATE FUNCTION as a string literal. If you write the string literal in the ordinary way with surrounding single quotes, then any single quotes inside the function body - must be doubled; likewise any backslashes must be doubled. + must be doubled; likewise any backslashes must be doubled (assuming + escape string syntax is used). Doubling quotes is at best tedious, and in more complicated cases the code can become downright incomprehensible, because you can easily find yourself needing half a dozen or more adjacent quote marks. @@ -434,13 +435,6 @@ a_output := a_output || $$ if v_$$ || referrer_keys.kind || $$ like '$$ - - A variant approach is to escape quotation marks in the function body - with a backslash rather than by doubling them. With this method - you'll find yourself writing things like \'\' instead - of ''''. Some find this easier to keep track of, some - do not. - diff --git a/doc/src/sgml/pltcl.sgml b/doc/src/sgml/pltcl.sgml index 3242c891e7..33e24fa0ea 100644 --- a/doc/src/sgml/pltcl.sgml +++ b/doc/src/sgml/pltcl.sgml @@ -1,4 +1,4 @@ - + PL/Tcl - Tcl Procedural Language @@ -387,11 +387,11 @@ CREATE FUNCTION t1_count(integer, integer) RETURNS integer AS $$ $$ LANGUAGE pltcl; - We need backslashes inside the query string given to - spi_prepare to ensure that the - $n markers will be passed - through to spi_prepare as-is, and not replaced by Tcl - variable substitution. + We need backslashes inside the query string given to + spi_prepare to ensure that the + $n markers will be passed + through to spi_prepare as-is, and not replaced by Tcl + variable substitution. diff --git a/doc/src/sgml/rowtypes.sgml b/doc/src/sgml/rowtypes.sgml index 42bbf53916..36f468e6ac 100644 --- a/doc/src/sgml/rowtypes.sgml +++ b/doc/src/sgml/rowtypes.sgml @@ -1,4 +1,4 @@ - + Composite Types @@ -294,11 +294,12 @@ INSERT INTO mytab (complex_col.r, complex_col.i) VALUES(1.1, 2.2); Remember that what you write in an SQL command will first be interpreted as a string literal, and then as a composite. This doubles the number of - backslashes you need. For example, to insert a text field + backslashes you need (assuming escape string syntax is used). + For example, to insert a text field containing a double quote and a backslash in a composite value, you'd need to write -INSERT ... VALUES ('("\\"\\\\")'); +INSERT ... VALUES (E'("\\"\\\\")'); The string-literal processor removes one level of backslashes, so that what arrives at the composite-value parser looks like diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml index 97c5dbb578..ca9d4261ff 100644 --- a/doc/src/sgml/xfunc.sgml +++ b/doc/src/sgml/xfunc.sgml @@ -1,4 +1,4 @@ - + User-Defined Functions @@ -147,9 +147,9 @@ SELECT clean_emp(); most convenient to use dollar quoting (see ) for the string constant. If you choose to use regular single-quoted string constant syntax, - you must escape single quote marks (') and backslashes - (\) used in the body of the function, typically by - doubling them (see ). + you must double single quote marks (') and backslashes + (\) (assuming escape string syntax) in the body of + the function (see ). -- 2.40.0