*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.152 2006/10/07 00:11:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.153 2006/11/08 19:22:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "catalog/pg_type.h"
#include "libpq/md5.h"
#include "libpq/pqformat.h"
+#include "miscadmin.h"
#include "parser/scansup.h"
#include "regex/regex.h"
#include "utils/builtins.h"
PG_RETURN_TEXT_P(result);
}
+/*
+ * charlen_to_bytelen()
+ * Compute the number of bytes occupied by n characters starting at *p
+ *
+ * It is caller's responsibility that there actually are n characters;
+ * the string need not be null-terminated.
+ */
+static int
+charlen_to_bytelen(const char *p, int n)
+{
+ if (pg_database_encoding_max_length() == 1)
+ {
+ /* Optimization for single-byte encodings */
+ return n;
+ }
+ else
+ {
+ const char *s;
+
+ for (s = p; n > 0; n--)
+ s += pg_mblen(s);
+
+ return s - p;
+ }
+}
+
/*
* text_substr()
* Return a substring starting at the specified position.
* functions. Note that the argument is passed as a Datum, to indicate that
* it may still be in compressed/toasted form. We can avoid detoasting all
* of it in some cases.
+ *
+ * The result is always a freshly palloc'd datum.
*/
static text *
text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
*/
slice_size = (S1 + L1) * eml;
}
- slice = DatumGetTextPSlice(str, slice_start, slice_size);
+
+ /*
+ * If we're working with an untoasted source, no need to do an
+ * extra copying step.
+ */
+ if (VARATT_IS_EXTENDED(str))
+ slice = DatumGetTextPSlice(str, slice_start, slice_size);
+ else
+ slice = (text *) DatumGetPointer(str);
/* see if we got back an empty string */
if ((VARSIZE(slice) - VARHDRSZ) == 0)
+ {
+ if (slice != (text *) DatumGetPointer(str))
+ pfree(slice);
return PG_STR_GET_TEXT("");
+ }
/* Now we can get the actual length of the slice in MB characters */
slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
* says to return a zero-length string.
*/
if (S1 > slice_strlen)
+ {
+ if (slice != (text *) DatumGetPointer(str))
+ pfree(slice);
return PG_STR_GET_TEXT("");
+ }
/*
* Adjust L1 and E1 now that we know the slice string length. Again
VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
memcpy(VARDATA(ret), s, (p - s));
+ if (slice != (text *) DatumGetPointer(str))
+ pfree(slice);
+
return ret;
}
else
int src_text_len = TEXTLEN(src_text);
int from_sub_text_len = TEXTLEN(from_sub_text);
TextPositionState state;
- text *chunk_text;
text *ret_text;
int start_posn;
int curr_posn;
+ int chunk_len;
+ char *start_ptr;
StringInfoData str;
if (src_text_len == 0 || from_sub_text_len == 0)
PG_RETURN_TEXT_P(src_text);
}
+ /* start_ptr points to the start_posn'th character of src_text */
+ start_ptr = (char *) VARDATA(src_text);
+
initStringInfo(&str);
do
{
- chunk_text = text_substring(PointerGetDatum(src_text),
- start_posn,
- curr_posn - start_posn,
- false);
- appendStringInfoText(&str, chunk_text);
- pfree(chunk_text);
+ /* copy the data skipped over by last text_position_next() */
+ chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
+ appendBinaryStringInfo(&str, start_ptr, chunk_len);
appendStringInfoText(&str, to_sub_text);
- start_posn = curr_posn + from_sub_text_len;
+ start_posn = curr_posn;
+ start_ptr += chunk_len;
+ start_posn += from_sub_text_len;
+ start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
+
curr_posn = text_position_next(start_posn, &state);
}
while (curr_posn > 0);
- /* copy trailing chunk */
- chunk_text = text_substring(PointerGetDatum(src_text),
- start_posn,
- -1,
- true);
- appendStringInfoText(&str, chunk_text);
- pfree(chunk_text);
+ /* copy trailing data */
+ chunk_len = ((char *) src_text + VARSIZE(src_text)) - start_ptr;
+ appendBinaryStringInfo(&str, start_ptr, chunk_len);
text_position_cleanup(&state);
* appendStringInfoRegexpSubstr
*
* Append replace_text to str, substituting regexp back references for
- * \n escapes.
+ * \n escapes. start_ptr is the start of the match in the source string,
+ * at logical character position data_pos.
*/
static void
appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
- regmatch_t *pmatch, text *src_text)
+ regmatch_t *pmatch,
+ char *start_ptr, int data_pos)
{
const char *p = VARDATA(replace_text);
const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
if (so != -1 && eo != -1)
{
/*
- * Copy the text that is back reference of regexp. Because so and
- * eo are counted in characters not bytes, it's easiest to use
- * text_substring to pull out the correct chunk of text.
+ * Copy the text that is back reference of regexp. Note so and
+ * eo are counted in characters not bytes.
*/
- text *append_text;
-
- append_text = text_substring(PointerGetDatum(src_text),
- so + 1, (eo - so), false);
- appendStringInfoText(str, append_text);
- pfree(append_text);
+ char *chunk_start;
+ int chunk_len;
+
+ Assert(so >= data_pos);
+ chunk_start = start_ptr;
+ chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
+ chunk_len = charlen_to_bytelen(chunk_start, eo - so);
+ appendBinaryStringInfo(str, chunk_start, chunk_len);
}
}
}
size_t data_len;
int search_start;
int data_pos;
+ char *start_ptr;
bool have_escape;
initStringInfo(&buf);
/* Check whether replace_text has escape char. */
have_escape = check_replace_text_has_escape_char(replace_text);
- for (search_start = data_pos = 0; search_start <= data_len;)
+ /* start_ptr points to the data_pos'th character of src_text */
+ start_ptr = (char *) VARDATA(src_text);
+ data_pos = 0;
+
+ search_start = 0;
+ while (search_start <= data_len)
{
int regexec_result;
+ CHECK_FOR_INTERRUPTS();
+
regexec_result = pg_regexec(re,
data,
data_len,
}
/*
- * Copy the text to the left of the match position. Because we are
- * working with character not byte indexes, it's easiest to use
- * text_substring to pull out the needed data.
+ * Copy the text to the left of the match position. Note we are
+ * given character not byte indexes.
*/
if (pmatch[0].rm_so - data_pos > 0)
{
- text *left_text;
-
- left_text = text_substring(PointerGetDatum(src_text),
- data_pos + 1,
- pmatch[0].rm_so - data_pos,
- false);
- appendStringInfoText(&buf, left_text);
- pfree(left_text);
+ int chunk_len;
+
+ chunk_len = charlen_to_bytelen(start_ptr,
+ pmatch[0].rm_so - data_pos);
+ appendBinaryStringInfo(&buf, start_ptr, chunk_len);
+ /*
+ * Advance start_ptr over that text, to avoid multiple rescans
+ * of it if the replace_text contains multiple back-references.
+ */
+ start_ptr += chunk_len;
+ data_pos = pmatch[0].rm_so;
}
/*
* replace_text has escape characters.
*/
if (have_escape)
- appendStringInfoRegexpSubstr(&buf, replace_text, pmatch, src_text);
+ appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
+ start_ptr, data_pos);
else
appendStringInfoText(&buf, replace_text);
- search_start = data_pos = pmatch[0].rm_eo;
+ /* Advance start_ptr and data_pos over the matched text. */
+ start_ptr += charlen_to_bytelen(start_ptr,
+ pmatch[0].rm_eo - data_pos);
+ data_pos = pmatch[0].rm_eo;
/*
* When global option is off, replace the first instance only.
/*
* Search from next character when the matching text is zero width.
*/
+ search_start = data_pos;
if (pmatch[0].rm_so == pmatch[0].rm_eo)
search_start++;
}
*/
if (data_pos < data_len)
{
- text *right_text;
+ int chunk_len;
- right_text = text_substring(PointerGetDatum(src_text),
- data_pos + 1, -1, true);
- appendStringInfoText(&buf, right_text);
- pfree(right_text);
+ chunk_len = ((char *) src_text + VARSIZE(src_text)) - start_ptr;
+ appendBinaryStringInfo(&buf, start_ptr, chunk_len);
}
ret_text = PG_STR_GET_TEXT(buf.data);
int fldnum;
int start_posn;
int end_posn;
+ int chunk_len;
+ char *start_ptr;
text *result_text;
ArrayBuildState *astate = NULL;
text_position_setup(inputstring, fldsep, &state);
start_posn = 1;
+ /* start_ptr points to the start_posn'th character of inputstring */
+ start_ptr = (char *) VARDATA(inputstring);
+
for (fldnum = 1;; fldnum++) /* field number is 1 based */
{
end_posn = text_position_next(start_posn, &state);
if (end_posn == 0)
{
/* fetch last field */
- result_text = text_substring(PointerGetDatum(inputstring),
- start_posn,
- -1,
- true);
+ chunk_len = ((char *) inputstring + VARSIZE(inputstring)) - start_ptr;
}
else
{
/* fetch non-last field */
- result_text = text_substring(PointerGetDatum(inputstring),
- start_posn,
- end_posn - start_posn,
- false);
+ chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
}
+ /* must build a temp text datum to pass to accumArrayResult */
+ result_text = (text *) palloc(VARHDRSZ + chunk_len);
+ VARATT_SIZEP(result_text) = VARHDRSZ + chunk_len;
+ memcpy(VARDATA(result_text), start_ptr, chunk_len);
+
/* stash away this field */
astate = accumArrayResult(astate,
PointerGetDatum(result_text),
TEXTOID,
CurrentMemoryContext);
+ pfree(result_text);
+
if (end_posn == 0)
break;
- start_posn = end_posn + fldsep_len;
+
+ start_posn = end_posn;
+ start_ptr += chunk_len;
+ start_posn += fldsep_len;
+ start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
}
text_position_cleanup(&state);