* varlena.c
* Functions for the variable-length built-in types.
*
- * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.137 2005/10/17 16:24:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.151 2006/10/04 00:30:00 momjian Exp $
*
*-------------------------------------------------------------------------
*/
#include <ctype.h>
+#include "access/tupmacs.h"
#include "access/tuptoaster.h"
#include "catalog/pg_type.h"
-#include "lib/stringinfo.h"
-#include "libpq/crypt.h"
+#include "libpq/md5.h"
#include "libpq/pqformat.h"
-#include "mb/pg_wchar.h"
-#include "miscadmin.h"
#include "parser/scansup.h"
-#include "utils/array.h"
+#include "regex/regex.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/pg_locale.h"
-#include "regex/regex.h"
typedef struct varlena unknown;
* ereport(ERROR, ...) if bad form.
*
* BUGS:
- * The input is scaned twice.
+ * The input is scanned twice.
* The error checking of input is minimal.
*/
Datum
text *result;
int len;
- /* verify encoding */
len = strlen(inputText);
- pg_verifymbstr(inputText, len, false);
-
result = (text *) palloc(len + VARHDRSZ);
VARATT_SIZEP(result) = len + VARHDRSZ;
str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
- /* verify encoding */
- pg_verifymbstr(str, nbytes, false);
-
result = (text *) palloc(nbytes + VARHDRSZ);
VARATT_SIZEP(result) = nbytes + VARHDRSZ;
memcpy(VARDATA(result), str, nbytes);
(LPWSTR) a1p, a1len / 2);
if (!r)
ereport(ERROR,
- (errmsg("could not convert string to UTF16: %lu",
- GetLastError())));
+ (errmsg("could not convert string to UTF-16: error %lu",
+ GetLastError())));
}
((LPWSTR) a1p)[r] = 0;
(LPWSTR) a2p, a2len / 2);
if (!r)
ereport(ERROR,
- (errmsg("could not convert string to UTF16: %lu",
- GetLastError())));
+ (errmsg("could not convert string to UTF-16: error %lu",
+ GetLastError())));
}
((LPWSTR) a2p)[r] = 0;
if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
* headers */
ereport(ERROR,
- (errmsg("could not compare unicode strings: %d",
- errno)));
+ (errmsg("could not compare Unicode strings: %m")));
if (a1p != a1buf)
pfree(a1p);
result = strcoll(a1p, a2p);
+ /*
+ * In some locales strcoll() can claim that nonidentical strings are
+ * equal. Believing that would be bad news for a number of reasons,
+ * so we follow Perl's lead and sort "equal" strings according to
+ * strcmp().
+ */
+ if (result == 0)
+ result = strcmp(a1p, a2p);
+
if (a1p != a1buf)
pfree(a1p);
if (a2p != a2buf)
text *arg2 = PG_GETARG_TEXT_P(1);
bool result;
- /* fast path for different-length inputs */
+ /*
+ * Since we only care about equality or not-equality, we can avoid all the
+ * expense of strcoll() here, and just do bitwise comparison.
+ */
if (VARSIZE(arg1) != VARSIZE(arg2))
result = false;
else
- result = (text_cmp(arg1, arg2) == 0);
+ result = (strncmp(VARDATA(arg1), VARDATA(arg2),
+ VARSIZE(arg1) - VARHDRSZ) == 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
text *arg2 = PG_GETARG_TEXT_P(1);
bool result;
- /* fast path for different-length inputs */
+ /*
+ * Since we only care about equality or not-equality, we can avoid all the
+ * expense of strcoll() here, and just do bitwise comparison.
+ */
if (VARSIZE(arg1) != VARSIZE(arg2))
result = true;
else
- result = (text_cmp(arg1, arg2) != 0);
+ result = (strncmp(VARDATA(arg1), VARDATA(arg2),
+ VARSIZE(arg1) - VARHDRSZ) != 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
text *buf_text;
text *ret_text;
int curr_posn;
- StringInfo str;
+ StringInfoData str;
if (src_text_len == 0 || from_sub_text_len == 0)
PG_RETURN_TEXT_P(src_text);
if (curr_posn == 0)
PG_RETURN_TEXT_P(src_text);
- str = makeStringInfo();
+ initStringInfo(&str);
buf_text = src_text;
while (curr_posn > 0)
right_text = text_substring(PointerGetDatum(buf_text),
curr_posn + from_sub_text_len, -1, true);
- appendStringInfoText(str, left_text);
- appendStringInfoText(str, to_sub_text);
+ appendStringInfoText(&str, left_text);
+ appendStringInfoText(&str, to_sub_text);
if (buf_text != src_text)
pfree(buf_text);
curr_posn = TEXTPOS(buf_text, from_sub_text);
}
- appendStringInfoText(str, buf_text);
+ appendStringInfoText(&str, buf_text);
if (buf_text != src_text)
pfree(buf_text);
- ret_text = PG_STR_GET_TEXT(str->data);
- pfree(str->data);
- pfree(str);
+ ret_text = PG_STR_GET_TEXT(str.data);
+ pfree(str.data);
PG_RETURN_TEXT_P(ret_text);
}
/*
* check_replace_text_has_escape_char
- * check whether replace_text has escape char.
+ *
+ * check whether replace_text contains escape char.
*/
static bool
check_replace_text_has_escape_char(const text *replace_text)
if (pg_database_encoding_max_length() == 1)
{
for (; p < p_end; p++)
+ {
if (*p == '\\')
return true;
+ }
}
else
{
for (; p < p_end; p += pg_mblen(p))
+ {
if (*p == '\\')
return true;
+ }
}
return false;
/*
* appendStringInfoRegexpSubstr
- * append string by using back references of regexp.
+ *
+ * Append replace_text to str, substituting regexp back references for
+ * \n escapes.
*/
static void
appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
{
const char *p = VARDATA(replace_text);
const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
-
int eml = pg_database_encoding_max_length();
- int substr_start = 1;
- int ch_cnt;
-
- int so;
- int eo;
-
- while (1)
+ for (;;)
{
- /* Find escape char. */
- ch_cnt = 0;
+ const char *chunk_start = p;
+ int so;
+ int eo;
+
+ /* Find next escape char. */
if (eml == 1)
{
for (; p < p_end && *p != '\\'; p++)
- ch_cnt++;
+ /* nothing */ ;
}
else
{
for (; p < p_end && *p != '\\'; p += pg_mblen(p))
- ch_cnt++;
+ /* nothing */ ;
}
- /*
- * Copy the text when there is a text in the left of escape char or
- * escape char is not found.
- */
- if (ch_cnt)
- {
- text *append_text = text_substring(PointerGetDatum(replace_text),
- substr_start, ch_cnt, false);
-
- appendStringInfoText(str, append_text);
- pfree(append_text);
- }
- substr_start += ch_cnt + 1;
+ /* Copy the text we just scanned over, if any. */
+ if (p > chunk_start)
+ appendBinaryStringInfo(str, chunk_start, p - chunk_start);
- if (p >= p_end) /* When escape char is not found. */
+ /* Done if at end of string, else advance over escape char. */
+ if (p >= p_end)
break;
-
- /* See the next character of escape char. */
p++;
- so = eo = -1;
+
+ if (p >= p_end)
+ {
+ /* Escape at very end of input. Treat same as unexpected char */
+ appendStringInfoChar(str, '\\');
+ break;
+ }
if (*p >= '1' && *p <= '9')
{
so = pmatch[idx].rm_so;
eo = pmatch[idx].rm_eo;
p++;
- substr_start++;
}
else if (*p == '&')
{
so = pmatch[0].rm_so;
eo = pmatch[0].rm_eo;
p++;
- substr_start++;
+ }
+ else if (*p == '\\')
+ {
+ /* \\ means transfer one \ to output. */
+ appendStringInfoChar(str, '\\');
+ p++;
+ continue;
+ }
+ else
+ {
+ /*
+ * If escape char is not followed by any expected char, just treat
+ * it as ordinary data to copy. (XXX would it be better to throw
+ * an error?)
+ */
+ appendStringInfoChar(str, '\\');
+ continue;
}
if (so != -1 && eo != -1)
{
- /* Copy the text that is back reference of regexp. */
- text *append_text = text_substring(PointerGetDatum(src_text),
- so + 1, (eo - so), false);
+ /*
+ * Copy the text that is back reference of regexp. Because so and
+ * eo are counted in characters not bytes, it's easiest to use
+ * text_substring to pull out the correct chunk of text.
+ */
+ text *append_text;
+ append_text = text_substring(PointerGetDatum(src_text),
+ so + 1, (eo - so), false);
appendStringInfoText(str, append_text);
pfree(append_text);
}
/*
* replace_text_regexp
+ *
* replace text that matches to regexp in src_text to replace_text.
+ *
+ * Note: to avoid having to include regex.h in builtins.h, we declare
+ * the regexp argument as void *, but really it's regex_t *.
*/
-Datum
-replace_text_regexp(PG_FUNCTION_ARGS)
+text *
+replace_text_regexp(text *src_text, void *regexp,
+ text *replace_text, bool glob)
{
text *ret_text;
- text *src_text = PG_GETARG_TEXT_P(0);
+ regex_t *re = (regex_t *) regexp;
int src_text_len = VARSIZE(src_text) - VARHDRSZ;
- regex_t *re = (regex_t *) PG_GETARG_POINTER(1);
- text *replace_text = PG_GETARG_TEXT_P(2);
- bool global = PG_GETARG_BOOL(3);
- StringInfo str = makeStringInfo();
- int regexec_result;
+ StringInfoData buf;
regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
pg_wchar *data;
size_t data_len;
int data_pos;
bool have_escape;
+ initStringInfo(&buf);
+
/* Convert data string to wide characters. */
data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len);
for (search_start = data_pos = 0; search_start <= data_len;)
{
+ int regexec_result;
+
regexec_result = pg_regexec(re,
data,
data_len,
pmatch,
0);
- if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
+ if (regexec_result == REG_NOMATCH)
+ break;
+
+ if (regexec_result != REG_OKAY)
{
char errMsg[100];
- /* re failed??? */
pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("regular expression failed: %s", errMsg)));
}
- if (regexec_result == REG_NOMATCH)
- break;
-
/*
- * Copy the text when there is a text in the left of matched position.
+ * Copy the text to the left of the match position. Because we are
+ * working with character not byte indexes, it's easiest to use
+ * text_substring to pull out the needed data.
*/
if (pmatch[0].rm_so - data_pos > 0)
{
- text *left_text = text_substring(PointerGetDatum(src_text),
- data_pos + 1,
- pmatch[0].rm_so - data_pos, false);
+ text *left_text;
- appendStringInfoText(str, left_text);
+ left_text = text_substring(PointerGetDatum(src_text),
+ data_pos + 1,
+ pmatch[0].rm_so - data_pos,
+ false);
+ appendStringInfoText(&buf, left_text);
pfree(left_text);
}
* replace_text has escape characters.
*/
if (have_escape)
- appendStringInfoRegexpSubstr(str, replace_text, pmatch, src_text);
+ appendStringInfoRegexpSubstr(&buf, replace_text, pmatch, src_text);
else
- appendStringInfoText(str, replace_text);
+ appendStringInfoText(&buf, replace_text);
search_start = data_pos = pmatch[0].rm_eo;
/*
* When global option is off, replace the first instance only.
*/
- if (!global)
+ if (!glob)
break;
/*
}
/*
- * Copy the text when there is a text at the right of last matched or
- * regexp is not matched.
+ * Copy the text to the right of the last match.
*/
if (data_pos < data_len)
{
- text *right_text = text_substring(PointerGetDatum(src_text),
- data_pos + 1, -1, true);
+ text *right_text;
- appendStringInfoText(str, right_text);
+ right_text = text_substring(PointerGetDatum(src_text),
+ data_pos + 1, -1, true);
+ appendStringInfoText(&buf, right_text);
pfree(right_text);
}
- ret_text = PG_STR_GET_TEXT(str->data);
- pfree(str->data);
- pfree(str);
+ ret_text = PG_STR_GET_TEXT(buf.data);
+ pfree(buf.data);
pfree(data);
- PG_RETURN_TEXT_P(ret_text);
+ return ret_text;
}
/*
int nitems,
*dims,
ndims;
- char *p;
Oid element_type;
int typlen;
bool typbyval;
char typalign;
- StringInfo result_str = makeStringInfo();
+ StringInfoData buf;
+ bool printed = false;
+ char *p;
+ bits8 *bitmap;
+ int bitmask;
int i;
ArrayMetaState *my_extra;
- p = ARR_DATA_PTR(v);
ndims = ARR_NDIM(v);
dims = ARR_DIMS(v);
nitems = ArrayGetNItems(ndims, dims);
PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
element_type = ARR_ELEMTYPE(v);
+ initStringInfo(&buf);
/*
* We arrange to look up info about element type, including its output
fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
sizeof(ArrayMetaState));
my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
- my_extra->element_type = InvalidOid;
+ my_extra->element_type = ~element_type;
}
if (my_extra->element_type != element_type)
typbyval = my_extra->typbyval;
typalign = my_extra->typalign;
+ p = ARR_DATA_PTR(v);
+ bitmap = ARR_NULLBITMAP(v);
+ bitmask = 1;
+
for (i = 0; i < nitems; i++)
{
Datum itemvalue;
char *value;
- itemvalue = fetch_att(p, typbyval, typlen);
+ /* Get source element, checking for NULL */
+ if (bitmap && (*bitmap & bitmask) == 0)
+ {
+ /* we ignore nulls */
+ }
+ else
+ {
+ itemvalue = fetch_att(p, typbyval, typlen);
- value = DatumGetCString(FunctionCall1(&my_extra->proc,
- itemvalue));
+ value = OutputFunctionCall(&my_extra->proc, itemvalue);
- if (i > 0)
- appendStringInfo(result_str, "%s%s", fldsep, value);
- else
- appendStringInfoString(result_str, value);
+ if (printed)
+ appendStringInfo(&buf, "%s%s", fldsep, value);
+ else
+ appendStringInfoString(&buf, value);
+ printed = true;
+
+ p = att_addlength(p, typlen, PointerGetDatum(p));
+ p = (char *) att_align(p, typalign);
+ }
- p = att_addlength(p, typlen, PointerGetDatum(p));
- p = (char *) att_align(p, typalign);
+ /* advance bitmap pointer if any */
+ if (bitmap)
+ {
+ bitmask <<= 1;
+ if (bitmask == 0x100)
+ {
+ bitmap++;
+ bitmask = 1;
+ }
+ }
}
- PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
+ PG_RETURN_TEXT_P(PG_STR_GET_TEXT(buf.data));
}
#define HEXBASE 16