]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/varlena.c
Re-run pgindent, fixing a problem where comment lines after a blank
[postgresql] / src / backend / utils / adt / varlena.c
1 /*-------------------------------------------------------------------------
2  *
3  * varlena.c
4  *        Functions for the variable-length built-in types.
5  *
6  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.141 2005/11/22 18:17:23 momjian Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include <ctype.h>
18
19 #include "access/tuptoaster.h"
20 #include "catalog/pg_type.h"
21 #include "lib/stringinfo.h"
22 #include "libpq/crypt.h"
23 #include "libpq/pqformat.h"
24 #include "mb/pg_wchar.h"
25 #include "miscadmin.h"
26 #include "parser/scansup.h"
27 #include "regex/regex.h"
28 #include "utils/array.h"
29 #include "utils/builtins.h"
30 #include "utils/lsyscache.h"
31 #include "utils/pg_locale.h"
32
33
34 typedef struct varlena unknown;
35
36 #define DatumGetUnknownP(X)                     ((unknown *) PG_DETOAST_DATUM(X))
37 #define DatumGetUnknownPCopy(X)         ((unknown *) PG_DETOAST_DATUM_COPY(X))
38 #define PG_GETARG_UNKNOWN_P(n)          DatumGetUnknownP(PG_GETARG_DATUM(n))
39 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
40 #define PG_RETURN_UNKNOWN_P(x)          PG_RETURN_POINTER(x)
41
42 #define PG_TEXTARG_GET_STR(arg_) \
43         DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
44 #define PG_TEXT_GET_STR(textp_) \
45         DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
46 #define PG_STR_GET_TEXT(str_) \
47         DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
48 #define TEXTLEN(textp) \
49         text_length(PointerGetDatum(textp))
50 #define TEXTPOS(buf_text, from_sub_text) \
51         text_position(buf_text, from_sub_text, 1)
52 #define LEFT(buf_text, from_sub_text) \
53         text_substring(PointerGetDatum(buf_text), \
54                                         1, \
55                                         TEXTPOS(buf_text, from_sub_text) - 1, false)
56
57 static int      text_cmp(text *arg1, text *arg2);
58 static int32 text_length(Datum str);
59 static int32 text_position(text *t1, text *t2, int matchnum);
60 static text *text_substring(Datum str,
61                            int32 start,
62                            int32 length,
63                            bool length_not_specified);
64
65 static void appendStringInfoText(StringInfo str, const text *t);
66
67
68 /*****************************************************************************
69  *       USER I/O ROUTINES                                                                                                               *
70  *****************************************************************************/
71
72
73 #define VAL(CH)                 ((CH) - '0')
74 #define DIG(VAL)                ((VAL) + '0')
75
76 /*
77  *              byteain                 - converts from printable representation of byte array
78  *
79  *              Non-printable characters must be passed as '\nnn' (octal) and are
80  *              converted to internal form.  '\' must be passed as '\\'.
81  *              ereport(ERROR, ...) if bad form.
82  *
83  *              BUGS:
84  *                              The input is scaned twice.
85  *                              The error checking of input is minimal.
86  */
87 Datum
88 byteain(PG_FUNCTION_ARGS)
89 {
90         char       *inputText = PG_GETARG_CSTRING(0);
91         char       *tp;
92         char       *rp;
93         int                     byte;
94         bytea      *result;
95
96         for (byte = 0, tp = inputText; *tp != '\0'; byte++)
97         {
98                 if (tp[0] != '\\')
99                         tp++;
100                 else if ((tp[0] == '\\') &&
101                                  (tp[1] >= '0' && tp[1] <= '3') &&
102                                  (tp[2] >= '0' && tp[2] <= '7') &&
103                                  (tp[3] >= '0' && tp[3] <= '7'))
104                         tp += 4;
105                 else if ((tp[0] == '\\') &&
106                                  (tp[1] == '\\'))
107                         tp += 2;
108                 else
109                 {
110                         /*
111                          * one backslash, not followed by 0 or ### valid octal
112                          */
113                         ereport(ERROR,
114                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
115                                          errmsg("invalid input syntax for type bytea")));
116                 }
117         }
118
119         byte += VARHDRSZ;
120         result = (bytea *) palloc(byte);
121         VARATT_SIZEP(result) = byte;    /* set varlena length */
122
123         tp = inputText;
124         rp = VARDATA(result);
125         while (*tp != '\0')
126         {
127                 if (tp[0] != '\\')
128                         *rp++ = *tp++;
129                 else if ((tp[0] == '\\') &&
130                                  (tp[1] >= '0' && tp[1] <= '3') &&
131                                  (tp[2] >= '0' && tp[2] <= '7') &&
132                                  (tp[3] >= '0' && tp[3] <= '7'))
133                 {
134                         byte = VAL(tp[1]);
135                         byte <<= 3;
136                         byte += VAL(tp[2]);
137                         byte <<= 3;
138                         *rp++ = byte + VAL(tp[3]);
139                         tp += 4;
140                 }
141                 else if ((tp[0] == '\\') &&
142                                  (tp[1] == '\\'))
143                 {
144                         *rp++ = '\\';
145                         tp += 2;
146                 }
147                 else
148                 {
149                         /*
150                          * We should never get here. The first pass should not allow it.
151                          */
152                         ereport(ERROR,
153                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
154                                          errmsg("invalid input syntax for type bytea")));
155                 }
156         }
157
158         PG_RETURN_BYTEA_P(result);
159 }
160
161 /*
162  *              byteaout                - converts to printable representation of byte array
163  *
164  *              Non-printable characters are inserted as '\nnn' (octal) and '\' as
165  *              '\\'.
166  *
167  *              NULL vlena should be an error--returning string with NULL for now.
168  */
169 Datum
170 byteaout(PG_FUNCTION_ARGS)
171 {
172         bytea      *vlena = PG_GETARG_BYTEA_P(0);
173         char       *result;
174         char       *vp;
175         char       *rp;
176         int                     val;                    /* holds unprintable chars */
177         int                     i;
178         int                     len;
179
180         len = 1;                                        /* empty string has 1 char */
181         vp = VARDATA(vlena);
182         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
183         {
184                 if (*vp == '\\')
185                         len += 2;
186                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
187                         len += 4;
188                 else
189                         len++;
190         }
191         rp = result = (char *) palloc(len);
192         vp = VARDATA(vlena);
193         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
194         {
195                 if (*vp == '\\')
196                 {
197                         *rp++ = '\\';
198                         *rp++ = '\\';
199                 }
200                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
201                 {
202                         val = *vp;
203                         rp[0] = '\\';
204                         rp[3] = DIG(val & 07);
205                         val >>= 3;
206                         rp[2] = DIG(val & 07);
207                         val >>= 3;
208                         rp[1] = DIG(val & 03);
209                         rp += 4;
210                 }
211                 else
212                         *rp++ = *vp;
213         }
214         *rp = '\0';
215         PG_RETURN_CSTRING(result);
216 }
217
218 /*
219  *              bytearecv                       - converts external binary format to bytea
220  */
221 Datum
222 bytearecv(PG_FUNCTION_ARGS)
223 {
224         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
225         bytea      *result;
226         int                     nbytes;
227
228         nbytes = buf->len - buf->cursor;
229         result = (bytea *) palloc(nbytes + VARHDRSZ);
230         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
231         pq_copymsgbytes(buf, VARDATA(result), nbytes);
232         PG_RETURN_BYTEA_P(result);
233 }
234
235 /*
236  *              byteasend                       - converts bytea to binary format
237  *
238  * This is a special case: just copy the input...
239  */
240 Datum
241 byteasend(PG_FUNCTION_ARGS)
242 {
243         bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
244
245         PG_RETURN_BYTEA_P(vlena);
246 }
247
248
249 /*
250  *              textin                  - converts "..." to internal representation
251  */
252 Datum
253 textin(PG_FUNCTION_ARGS)
254 {
255         char       *inputText = PG_GETARG_CSTRING(0);
256         text       *result;
257         int                     len;
258
259         /* verify encoding */
260         len = strlen(inputText);
261         pg_verifymbstr(inputText, len, false);
262
263         result = (text *) palloc(len + VARHDRSZ);
264         VARATT_SIZEP(result) = len + VARHDRSZ;
265
266         memcpy(VARDATA(result), inputText, len);
267
268         PG_RETURN_TEXT_P(result);
269 }
270
271 /*
272  *              textout                 - converts internal representation to "..."
273  */
274 Datum
275 textout(PG_FUNCTION_ARGS)
276 {
277         text       *t = PG_GETARG_TEXT_P(0);
278         int                     len;
279         char       *result;
280
281         len = VARSIZE(t) - VARHDRSZ;
282         result = (char *) palloc(len + 1);
283         memcpy(result, VARDATA(t), len);
284         result[len] = '\0';
285
286         PG_RETURN_CSTRING(result);
287 }
288
289 /*
290  *              textrecv                        - converts external binary format to text
291  */
292 Datum
293 textrecv(PG_FUNCTION_ARGS)
294 {
295         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
296         text       *result;
297         char       *str;
298         int                     nbytes;
299
300         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
301
302         /* verify encoding */
303         pg_verifymbstr(str, nbytes, false);
304
305         result = (text *) palloc(nbytes + VARHDRSZ);
306         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
307         memcpy(VARDATA(result), str, nbytes);
308         pfree(str);
309         PG_RETURN_TEXT_P(result);
310 }
311
312 /*
313  *              textsend                        - converts text to binary format
314  */
315 Datum
316 textsend(PG_FUNCTION_ARGS)
317 {
318         text       *t = PG_GETARG_TEXT_P(0);
319         StringInfoData buf;
320
321         pq_begintypsend(&buf);
322         pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
323         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
324 }
325
326
327 /*
328  *              unknownin                       - converts "..." to internal representation
329  */
330 Datum
331 unknownin(PG_FUNCTION_ARGS)
332 {
333         char       *str = PG_GETARG_CSTRING(0);
334
335         /* representation is same as cstring */
336         PG_RETURN_CSTRING(pstrdup(str));
337 }
338
339 /*
340  *              unknownout                      - converts internal representation to "..."
341  */
342 Datum
343 unknownout(PG_FUNCTION_ARGS)
344 {
345         /* representation is same as cstring */
346         char       *str = PG_GETARG_CSTRING(0);
347
348         PG_RETURN_CSTRING(pstrdup(str));
349 }
350
351 /*
352  *              unknownrecv                     - converts external binary format to unknown
353  */
354 Datum
355 unknownrecv(PG_FUNCTION_ARGS)
356 {
357         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
358         char       *str;
359         int                     nbytes;
360
361         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
362         /* representation is same as cstring */
363         PG_RETURN_CSTRING(str);
364 }
365
366 /*
367  *              unknownsend                     - converts unknown to binary format
368  */
369 Datum
370 unknownsend(PG_FUNCTION_ARGS)
371 {
372         /* representation is same as cstring */
373         char       *str = PG_GETARG_CSTRING(0);
374         StringInfoData buf;
375
376         pq_begintypsend(&buf);
377         pq_sendtext(&buf, str, strlen(str));
378         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
379 }
380
381
382 /* ========== PUBLIC ROUTINES ========== */
383
384 /*
385  * textlen -
386  *        returns the logical length of a text*
387  *         (which is less than the VARSIZE of the text*)
388  */
389 Datum
390 textlen(PG_FUNCTION_ARGS)
391 {
392         Datum           str = PG_GETARG_DATUM(0);
393
394         /* try to avoid decompressing argument */
395         PG_RETURN_INT32(text_length(str));
396 }
397
398 /*
399  * text_length -
400  *      Does the real work for textlen()
401  *
402  *      This is broken out so it can be called directly by other string processing
403  *      functions.      Note that the argument is passed as a Datum, to indicate that
404  *      it may still be in compressed form.  We can avoid decompressing it at all
405  *      in some cases.
406  */
407 static int32
408 text_length(Datum str)
409 {
410         /* fastpath when max encoding length is one */
411         if (pg_database_encoding_max_length() == 1)
412                 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
413         else
414         {
415                 text       *t = DatumGetTextP(str);
416
417                 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
418                                                                                          VARSIZE(t) - VARHDRSZ));
419         }
420 }
421
422 /*
423  * textoctetlen -
424  *        returns the physical length of a text*
425  *         (which is less than the VARSIZE of the text*)
426  */
427 Datum
428 textoctetlen(PG_FUNCTION_ARGS)
429 {
430         Datum           str = PG_GETARG_DATUM(0);
431
432         /* We need not detoast the input at all */
433         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
434 }
435
436 /*
437  * textcat -
438  *        takes two text* and returns a text* that is the concatenation of
439  *        the two.
440  *
441  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
442  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
443  * Allocate space for output in all cases.
444  * XXX - thomas 1997-07-10
445  */
446 Datum
447 textcat(PG_FUNCTION_ARGS)
448 {
449         text       *t1 = PG_GETARG_TEXT_P(0);
450         text       *t2 = PG_GETARG_TEXT_P(1);
451         int                     len1,
452                                 len2,
453                                 len;
454         text       *result;
455         char       *ptr;
456
457         len1 = VARSIZE(t1) - VARHDRSZ;
458         if (len1 < 0)
459                 len1 = 0;
460
461         len2 = VARSIZE(t2) - VARHDRSZ;
462         if (len2 < 0)
463                 len2 = 0;
464
465         len = len1 + len2 + VARHDRSZ;
466         result = (text *) palloc(len);
467
468         /* Set size of result string... */
469         VARATT_SIZEP(result) = len;
470
471         /* Fill data field of result string... */
472         ptr = VARDATA(result);
473         if (len1 > 0)
474                 memcpy(ptr, VARDATA(t1), len1);
475         if (len2 > 0)
476                 memcpy(ptr + len1, VARDATA(t2), len2);
477
478         PG_RETURN_TEXT_P(result);
479 }
480
481 /*
482  * text_substr()
483  * Return a substring starting at the specified position.
484  * - thomas 1997-12-31
485  *
486  * Input:
487  *      - string
488  *      - starting position (is one-based)
489  *      - string length
490  *
491  * If the starting position is zero or less, then return from the start of the string
492  *      adjusting the length to be consistent with the "negative start" per SQL92.
493  * If the length is less than zero, return the remaining string.
494  *
495  * Added multibyte support.
496  * - Tatsuo Ishii 1998-4-21
497  * Changed behavior if starting position is less than one to conform to SQL92 behavior.
498  * Formerly returned the entire string; now returns a portion.
499  * - Thomas Lockhart 1998-12-10
500  * Now uses faster TOAST-slicing interface
501  * - John Gray 2002-02-22
502  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
503  * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
504  * error; if E < 1, return '', not entire string). Fixed MB related bug when
505  * S > LC and < LC + 4 sometimes garbage characters are returned.
506  * - Joe Conway 2002-08-10
507  */
508 Datum
509 text_substr(PG_FUNCTION_ARGS)
510 {
511         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
512                                                                         PG_GETARG_INT32(1),
513                                                                         PG_GETARG_INT32(2),
514                                                                         false));
515 }
516
517 /*
518  * text_substr_no_len -
519  *        Wrapper to avoid opr_sanity failure due to
520  *        one function accepting a different number of args.
521  */
522 Datum
523 text_substr_no_len(PG_FUNCTION_ARGS)
524 {
525         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
526                                                                         PG_GETARG_INT32(1),
527                                                                         -1, true));
528 }
529
530 /*
531  * text_substring -
532  *      Does the real work for text_substr() and text_substr_no_len()
533  *
534  *      This is broken out so it can be called directly by other string processing
535  *      functions.      Note that the argument is passed as a Datum, to indicate that
536  *      it may still be in compressed/toasted form.  We can avoid detoasting all
537  *      of it in some cases.
538  */
539 static text *
540 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
541 {
542         int32           eml = pg_database_encoding_max_length();
543         int32           S = start;              /* start position */
544         int32           S1;                             /* adjusted start position */
545         int32           L1;                             /* adjusted substring length */
546
547         /* life is easy if the encoding max length is 1 */
548         if (eml == 1)
549         {
550                 S1 = Max(S, 1);
551
552                 if (length_not_specified)               /* special case - get length to end of
553                                                                                  * string */
554                         L1 = -1;
555                 else
556                 {
557                         /* end position */
558                         int                     E = S + length;
559
560                         /*
561                          * A negative value for L is the only way for the end position to
562                          * be before the start. SQL99 says to throw an error.
563                          */
564                         if (E < S)
565                                 ereport(ERROR,
566                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
567                                                  errmsg("negative substring length not allowed")));
568
569                         /*
570                          * A zero or negative value for the end position can happen if the
571                          * start was negative or one. SQL99 says to return a zero-length
572                          * string.
573                          */
574                         if (E < 1)
575                                 return PG_STR_GET_TEXT("");
576
577                         L1 = E - S1;
578                 }
579
580                 /*
581                  * If the start position is past the end of the string, SQL99 says to
582                  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
583                  * that for us. Convert to zero-based starting position
584                  */
585                 return DatumGetTextPSlice(str, S1 - 1, L1);
586         }
587         else if (eml > 1)
588         {
589                 /*
590                  * When encoding max length is > 1, we can't get LC without
591                  * detoasting, so we'll grab a conservatively large slice now and go
592                  * back later to do the right thing
593                  */
594                 int32           slice_start;
595                 int32           slice_size;
596                 int32           slice_strlen;
597                 text       *slice;
598                 int32           E1;
599                 int32           i;
600                 char       *p;
601                 char       *s;
602                 text       *ret;
603
604                 /*
605                  * if S is past the end of the string, the tuple toaster will return a
606                  * zero-length string to us
607                  */
608                 S1 = Max(S, 1);
609
610                 /*
611                  * We need to start at position zero because there is no way to know
612                  * in advance which byte offset corresponds to the supplied start
613                  * position.
614                  */
615                 slice_start = 0;
616
617                 if (length_not_specified)               /* special case - get length to end of
618                                                                                  * string */
619                         slice_size = L1 = -1;
620                 else
621                 {
622                         int                     E = S + length;
623
624                         /*
625                          * A negative value for L is the only way for the end position to
626                          * be before the start. SQL99 says to throw an error.
627                          */
628                         if (E < S)
629                                 ereport(ERROR,
630                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
631                                                  errmsg("negative substring length not allowed")));
632
633                         /*
634                          * A zero or negative value for the end position can happen if the
635                          * start was negative or one. SQL99 says to return a zero-length
636                          * string.
637                          */
638                         if (E < 1)
639                                 return PG_STR_GET_TEXT("");
640
641                         /*
642                          * if E is past the end of the string, the tuple toaster will
643                          * truncate the length for us
644                          */
645                         L1 = E - S1;
646
647                         /*
648                          * Total slice size in bytes can't be any longer than the start
649                          * position plus substring length times the encoding max length.
650                          */
651                         slice_size = (S1 + L1) * eml;
652                 }
653                 slice = DatumGetTextPSlice(str, slice_start, slice_size);
654
655                 /* see if we got back an empty string */
656                 if ((VARSIZE(slice) - VARHDRSZ) == 0)
657                         return PG_STR_GET_TEXT("");
658
659                 /* Now we can get the actual length of the slice in MB characters */
660                 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
661
662                 /*
663                  * Check that the start position wasn't > slice_strlen. If so, SQL99
664                  * says to return a zero-length string.
665                  */
666                 if (S1 > slice_strlen)
667                         return PG_STR_GET_TEXT("");
668
669                 /*
670                  * Adjust L1 and E1 now that we know the slice string length. Again
671                  * remember that S1 is one based, and slice_start is zero based.
672                  */
673                 if (L1 > -1)
674                         E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
675                 else
676                         E1 = slice_start + 1 + slice_strlen;
677
678                 /*
679                  * Find the start position in the slice; remember S1 is not zero based
680                  */
681                 p = VARDATA(slice);
682                 for (i = 0; i < S1 - 1; i++)
683                         p += pg_mblen(p);
684
685                 /* hang onto a pointer to our start position */
686                 s = p;
687
688                 /*
689                  * Count the actual bytes used by the substring of the requested
690                  * length.
691                  */
692                 for (i = S1; i < E1; i++)
693                         p += pg_mblen(p);
694
695                 ret = (text *) palloc(VARHDRSZ + (p - s));
696                 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
697                 memcpy(VARDATA(ret), s, (p - s));
698
699                 return ret;
700         }
701         else
702                 elog(ERROR, "invalid backend encoding: encoding max length < 1");
703
704         /* not reached: suppress compiler warning */
705         return NULL;
706 }
707
708 /*
709  * textpos -
710  *        Return the position of the specified substring.
711  *        Implements the SQL92 POSITION() function.
712  *        Ref: A Guide To The SQL Standard, Date & Darwen, 1997
713  * - thomas 1997-07-27
714  */
715 Datum
716 textpos(PG_FUNCTION_ARGS)
717 {
718         text       *str = PG_GETARG_TEXT_P(0);
719         text       *search_str = PG_GETARG_TEXT_P(1);
720
721         PG_RETURN_INT32(text_position(str, search_str, 1));
722 }
723
724 /*
725  * text_position -
726  *      Does the real work for textpos()
727  *
728  * Inputs:
729  *              t1 - string to be searched
730  *              t2 - pattern to match within t1
731  *              matchnum - number of the match to be found (1 is the first match)
732  * Result:
733  *              Character index of the first matched char, starting from 1,
734  *              or 0 if no match.
735  *
736  *      This is broken out so it can be called directly by other string processing
737  *      functions.
738  */
739 static int32
740 text_position(text *t1, text *t2, int matchnum)
741 {
742         int                     match = 0,
743                                 pos = 0,
744                                 p,
745                                 px,
746                                 len1,
747                                 len2;
748
749         if (matchnum <= 0)
750                 return 0;                               /* result for 0th match */
751
752         if (VARSIZE(t2) <= VARHDRSZ)
753                 return 1;                               /* result for empty pattern */
754
755         len1 = VARSIZE(t1) - VARHDRSZ;
756         len2 = VARSIZE(t2) - VARHDRSZ;
757
758         if (pg_database_encoding_max_length() == 1)
759         {
760                 /* simple case - single byte encoding */
761                 char       *p1,
762                                    *p2;
763
764                 p1 = VARDATA(t1);
765                 p2 = VARDATA(t2);
766
767                 /* no use in searching str past point where search_str will fit */
768                 px = (len1 - len2);
769
770                 for (p = 0; p <= px; p++)
771                 {
772                         if ((*p1 == *p2) && (strncmp(p1, p2, len2) == 0))
773                         {
774                                 if (++match == matchnum)
775                                 {
776                                         pos = p + 1;
777                                         break;
778                                 }
779                         }
780                         p1++;
781                 }
782         }
783         else
784         {
785                 /* not as simple - multibyte encoding */
786                 pg_wchar   *p1,
787                                    *p2,
788                                    *ps1,
789                                    *ps2;
790
791                 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
792                 (void) pg_mb2wchar_with_len(VARDATA(t1), p1, len1);
793                 len1 = pg_wchar_strlen(p1);
794                 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
795                 (void) pg_mb2wchar_with_len(VARDATA(t2), p2, len2);
796                 len2 = pg_wchar_strlen(p2);
797
798                 /* no use in searching str past point where search_str will fit */
799                 px = (len1 - len2);
800
801                 for (p = 0; p <= px; p++)
802                 {
803                         if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, len2) == 0))
804                         {
805                                 if (++match == matchnum)
806                                 {
807                                         pos = p + 1;
808                                         break;
809                                 }
810                         }
811                         p1++;
812                 }
813
814                 pfree(ps1);
815                 pfree(ps2);
816         }
817
818         return pos;
819 }
820
821 /* varstr_cmp()
822  * Comparison function for text strings with given lengths.
823  * Includes locale support, but must copy strings to temporary memory
824  *      to allow null-termination for inputs to strcoll().
825  * Returns -1, 0 or 1
826  */
827 int
828 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
829 {
830         int                     result;
831
832         /*
833          * Unfortunately, there is no strncoll(), so in the non-C locale case we
834          * have to do some memory copying.      This turns out to be significantly
835          * slower, so we optimize the case where LC_COLLATE is C.  We also try to
836          * optimize relatively-short strings by avoiding palloc/pfree overhead.
837          */
838         if (lc_collate_is_c())
839         {
840                 result = strncmp(arg1, arg2, Min(len1, len2));
841                 if ((result == 0) && (len1 != len2))
842                         result = (len1 < len2) ? -1 : 1;
843         }
844         else
845         {
846 #define STACKBUFLEN             1024
847
848                 char            a1buf[STACKBUFLEN];
849                 char            a2buf[STACKBUFLEN];
850                 char       *a1p,
851                                    *a2p;
852
853 #ifdef WIN32
854                 /* Win32 does not have UTF-8, so we need to map to UTF-16 */
855                 if (GetDatabaseEncoding() == PG_UTF8)
856                 {
857                         int                     a1len;
858                         int                     a2len;
859                         int                     r;
860
861                         if (len1 >= STACKBUFLEN / 2)
862                         {
863                                 a1len = len1 * 2 + 2;
864                                 a1p = palloc(a1len);
865                         }
866                         else
867                         {
868                                 a1len = STACKBUFLEN;
869                                 a1p = a1buf;
870                         }
871                         if (len2 >= STACKBUFLEN / 2)
872                         {
873                                 a2len = len2 * 2 + 2;
874                                 a2p = palloc(a2len);
875                         }
876                         else
877                         {
878                                 a2len = STACKBUFLEN;
879                                 a2p = a2buf;
880                         }
881
882                         /* stupid Microsloth API does not work for zero-length input */
883                         if (len1 == 0)
884                                 r = 0;
885                         else
886                         {
887                                 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
888                                                                                 (LPWSTR) a1p, a1len / 2);
889                                 if (!r)
890                                         ereport(ERROR,
891                                          (errmsg("could not convert string to UTF-16: error %lu",
892                                                          GetLastError())));
893                         }
894                         ((LPWSTR) a1p)[r] = 0;
895
896                         if (len2 == 0)
897                                 r = 0;
898                         else
899                         {
900                                 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
901                                                                                 (LPWSTR) a2p, a2len / 2);
902                                 if (!r)
903                                         ereport(ERROR,
904                                          (errmsg("could not convert string to UTF-16: error %lu",
905                                                          GetLastError())));
906                         }
907                         ((LPWSTR) a2p)[r] = 0;
908
909                         errno = 0;
910                         result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
911                         if (result == 2147483647)       /* _NLSCMPERROR; missing from mingw
912                                                                                  * headers */
913                                 ereport(ERROR,
914                                                 (errmsg("could not compare Unicode strings: %m")));
915
916                         if (a1p != a1buf)
917                                 pfree(a1p);
918                         if (a2p != a2buf)
919                                 pfree(a2p);
920
921                         return result;
922                 }
923 #endif   /* WIN32 */
924
925                 if (len1 >= STACKBUFLEN)
926                         a1p = (char *) palloc(len1 + 1);
927                 else
928                         a1p = a1buf;
929                 if (len2 >= STACKBUFLEN)
930                         a2p = (char *) palloc(len2 + 1);
931                 else
932                         a2p = a2buf;
933
934                 memcpy(a1p, arg1, len1);
935                 a1p[len1] = '\0';
936                 memcpy(a2p, arg2, len2);
937                 a2p[len2] = '\0';
938
939                 result = strcoll(a1p, a2p);
940
941                 if (a1p != a1buf)
942                         pfree(a1p);
943                 if (a2p != a2buf)
944                         pfree(a2p);
945         }
946
947         return result;
948 }
949
950
951 /* text_cmp()
952  * Internal comparison function for text strings.
953  * Returns -1, 0 or 1
954  */
955 static int
956 text_cmp(text *arg1, text *arg2)
957 {
958         char       *a1p,
959                            *a2p;
960         int                     len1,
961                                 len2;
962
963         a1p = VARDATA(arg1);
964         a2p = VARDATA(arg2);
965
966         len1 = VARSIZE(arg1) - VARHDRSZ;
967         len2 = VARSIZE(arg2) - VARHDRSZ;
968
969         return varstr_cmp(a1p, len1, a2p, len2);
970 }
971
972 /*
973  * Comparison functions for text strings.
974  *
975  * Note: btree indexes need these routines not to leak memory; therefore,
976  * be careful to free working copies of toasted datums.  Most places don't
977  * need to be so careful.
978  */
979
980 Datum
981 texteq(PG_FUNCTION_ARGS)
982 {
983         text       *arg1 = PG_GETARG_TEXT_P(0);
984         text       *arg2 = PG_GETARG_TEXT_P(1);
985         bool            result;
986
987         /* fast path for different-length inputs */
988         if (VARSIZE(arg1) != VARSIZE(arg2))
989                 result = false;
990         else
991                 result = (text_cmp(arg1, arg2) == 0);
992
993         PG_FREE_IF_COPY(arg1, 0);
994         PG_FREE_IF_COPY(arg2, 1);
995
996         PG_RETURN_BOOL(result);
997 }
998
999 Datum
1000 textne(PG_FUNCTION_ARGS)
1001 {
1002         text       *arg1 = PG_GETARG_TEXT_P(0);
1003         text       *arg2 = PG_GETARG_TEXT_P(1);
1004         bool            result;
1005
1006         /* fast path for different-length inputs */
1007         if (VARSIZE(arg1) != VARSIZE(arg2))
1008                 result = true;
1009         else
1010                 result = (text_cmp(arg1, arg2) != 0);
1011
1012         PG_FREE_IF_COPY(arg1, 0);
1013         PG_FREE_IF_COPY(arg2, 1);
1014
1015         PG_RETURN_BOOL(result);
1016 }
1017
1018 Datum
1019 text_lt(PG_FUNCTION_ARGS)
1020 {
1021         text       *arg1 = PG_GETARG_TEXT_P(0);
1022         text       *arg2 = PG_GETARG_TEXT_P(1);
1023         bool            result;
1024
1025         result = (text_cmp(arg1, arg2) < 0);
1026
1027         PG_FREE_IF_COPY(arg1, 0);
1028         PG_FREE_IF_COPY(arg2, 1);
1029
1030         PG_RETURN_BOOL(result);
1031 }
1032
1033 Datum
1034 text_le(PG_FUNCTION_ARGS)
1035 {
1036         text       *arg1 = PG_GETARG_TEXT_P(0);
1037         text       *arg2 = PG_GETARG_TEXT_P(1);
1038         bool            result;
1039
1040         result = (text_cmp(arg1, arg2) <= 0);
1041
1042         PG_FREE_IF_COPY(arg1, 0);
1043         PG_FREE_IF_COPY(arg2, 1);
1044
1045         PG_RETURN_BOOL(result);
1046 }
1047
1048 Datum
1049 text_gt(PG_FUNCTION_ARGS)
1050 {
1051         text       *arg1 = PG_GETARG_TEXT_P(0);
1052         text       *arg2 = PG_GETARG_TEXT_P(1);
1053         bool            result;
1054
1055         result = (text_cmp(arg1, arg2) > 0);
1056
1057         PG_FREE_IF_COPY(arg1, 0);
1058         PG_FREE_IF_COPY(arg2, 1);
1059
1060         PG_RETURN_BOOL(result);
1061 }
1062
1063 Datum
1064 text_ge(PG_FUNCTION_ARGS)
1065 {
1066         text       *arg1 = PG_GETARG_TEXT_P(0);
1067         text       *arg2 = PG_GETARG_TEXT_P(1);
1068         bool            result;
1069
1070         result = (text_cmp(arg1, arg2) >= 0);
1071
1072         PG_FREE_IF_COPY(arg1, 0);
1073         PG_FREE_IF_COPY(arg2, 1);
1074
1075         PG_RETURN_BOOL(result);
1076 }
1077
1078 Datum
1079 bttextcmp(PG_FUNCTION_ARGS)
1080 {
1081         text       *arg1 = PG_GETARG_TEXT_P(0);
1082         text       *arg2 = PG_GETARG_TEXT_P(1);
1083         int32           result;
1084
1085         result = text_cmp(arg1, arg2);
1086
1087         PG_FREE_IF_COPY(arg1, 0);
1088         PG_FREE_IF_COPY(arg2, 1);
1089
1090         PG_RETURN_INT32(result);
1091 }
1092
1093
1094 Datum
1095 text_larger(PG_FUNCTION_ARGS)
1096 {
1097         text       *arg1 = PG_GETARG_TEXT_P(0);
1098         text       *arg2 = PG_GETARG_TEXT_P(1);
1099         text       *result;
1100
1101         result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1102
1103         PG_RETURN_TEXT_P(result);
1104 }
1105
1106 Datum
1107 text_smaller(PG_FUNCTION_ARGS)
1108 {
1109         text       *arg1 = PG_GETARG_TEXT_P(0);
1110         text       *arg2 = PG_GETARG_TEXT_P(1);
1111         text       *result;
1112
1113         result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1114
1115         PG_RETURN_TEXT_P(result);
1116 }
1117
1118
1119 /*
1120  * The following operators support character-by-character comparison
1121  * of text data types, to allow building indexes suitable for LIKE
1122  * clauses.
1123  */
1124
1125 static int
1126 internal_text_pattern_compare(text *arg1, text *arg2)
1127 {
1128         int                     result;
1129
1130         result = memcmp(VARDATA(arg1), VARDATA(arg2),
1131                                         Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1132         if (result != 0)
1133                 return result;
1134         else if (VARSIZE(arg1) < VARSIZE(arg2))
1135                 return -1;
1136         else if (VARSIZE(arg1) > VARSIZE(arg2))
1137                 return 1;
1138         else
1139                 return 0;
1140 }
1141
1142
1143 Datum
1144 text_pattern_lt(PG_FUNCTION_ARGS)
1145 {
1146         text       *arg1 = PG_GETARG_TEXT_P(0);
1147         text       *arg2 = PG_GETARG_TEXT_P(1);
1148         int                     result;
1149
1150         result = internal_text_pattern_compare(arg1, arg2);
1151
1152         PG_FREE_IF_COPY(arg1, 0);
1153         PG_FREE_IF_COPY(arg2, 1);
1154
1155         PG_RETURN_BOOL(result < 0);
1156 }
1157
1158
1159 Datum
1160 text_pattern_le(PG_FUNCTION_ARGS)
1161 {
1162         text       *arg1 = PG_GETARG_TEXT_P(0);
1163         text       *arg2 = PG_GETARG_TEXT_P(1);
1164         int                     result;
1165
1166         result = internal_text_pattern_compare(arg1, arg2);
1167
1168         PG_FREE_IF_COPY(arg1, 0);
1169         PG_FREE_IF_COPY(arg2, 1);
1170
1171         PG_RETURN_BOOL(result <= 0);
1172 }
1173
1174
1175 Datum
1176 text_pattern_eq(PG_FUNCTION_ARGS)
1177 {
1178         text       *arg1 = PG_GETARG_TEXT_P(0);
1179         text       *arg2 = PG_GETARG_TEXT_P(1);
1180         int                     result;
1181
1182         if (VARSIZE(arg1) != VARSIZE(arg2))
1183                 result = 1;
1184         else
1185                 result = internal_text_pattern_compare(arg1, arg2);
1186
1187         PG_FREE_IF_COPY(arg1, 0);
1188         PG_FREE_IF_COPY(arg2, 1);
1189
1190         PG_RETURN_BOOL(result == 0);
1191 }
1192
1193
1194 Datum
1195 text_pattern_ge(PG_FUNCTION_ARGS)
1196 {
1197         text       *arg1 = PG_GETARG_TEXT_P(0);
1198         text       *arg2 = PG_GETARG_TEXT_P(1);
1199         int                     result;
1200
1201         result = internal_text_pattern_compare(arg1, arg2);
1202
1203         PG_FREE_IF_COPY(arg1, 0);
1204         PG_FREE_IF_COPY(arg2, 1);
1205
1206         PG_RETURN_BOOL(result >= 0);
1207 }
1208
1209
1210 Datum
1211 text_pattern_gt(PG_FUNCTION_ARGS)
1212 {
1213         text       *arg1 = PG_GETARG_TEXT_P(0);
1214         text       *arg2 = PG_GETARG_TEXT_P(1);
1215         int                     result;
1216
1217         result = internal_text_pattern_compare(arg1, arg2);
1218
1219         PG_FREE_IF_COPY(arg1, 0);
1220         PG_FREE_IF_COPY(arg2, 1);
1221
1222         PG_RETURN_BOOL(result > 0);
1223 }
1224
1225
1226 Datum
1227 text_pattern_ne(PG_FUNCTION_ARGS)
1228 {
1229         text       *arg1 = PG_GETARG_TEXT_P(0);
1230         text       *arg2 = PG_GETARG_TEXT_P(1);
1231         int                     result;
1232
1233         if (VARSIZE(arg1) != VARSIZE(arg2))
1234                 result = 1;
1235         else
1236                 result = internal_text_pattern_compare(arg1, arg2);
1237
1238         PG_FREE_IF_COPY(arg1, 0);
1239         PG_FREE_IF_COPY(arg2, 1);
1240
1241         PG_RETURN_BOOL(result != 0);
1242 }
1243
1244
1245 Datum
1246 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1247 {
1248         text       *arg1 = PG_GETARG_TEXT_P(0);
1249         text       *arg2 = PG_GETARG_TEXT_P(1);
1250         int                     result;
1251
1252         result = internal_text_pattern_compare(arg1, arg2);
1253
1254         PG_FREE_IF_COPY(arg1, 0);
1255         PG_FREE_IF_COPY(arg2, 1);
1256
1257         PG_RETURN_INT32(result);
1258 }
1259
1260
1261 /*-------------------------------------------------------------
1262  * byteaoctetlen
1263  *
1264  * get the number of bytes contained in an instance of type 'bytea'
1265  *-------------------------------------------------------------
1266  */
1267 Datum
1268 byteaoctetlen(PG_FUNCTION_ARGS)
1269 {
1270         Datum           str = PG_GETARG_DATUM(0);
1271
1272         /* We need not detoast the input at all */
1273         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1274 }
1275
1276 /*
1277  * byteacat -
1278  *        takes two bytea* and returns a bytea* that is the concatenation of
1279  *        the two.
1280  *
1281  * Cloned from textcat and modified as required.
1282  */
1283 Datum
1284 byteacat(PG_FUNCTION_ARGS)
1285 {
1286         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1287         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1288         int                     len1,
1289                                 len2,
1290                                 len;
1291         bytea      *result;
1292         char       *ptr;
1293
1294         len1 = VARSIZE(t1) - VARHDRSZ;
1295         if (len1 < 0)
1296                 len1 = 0;
1297
1298         len2 = VARSIZE(t2) - VARHDRSZ;
1299         if (len2 < 0)
1300                 len2 = 0;
1301
1302         len = len1 + len2 + VARHDRSZ;
1303         result = (bytea *) palloc(len);
1304
1305         /* Set size of result string... */
1306         VARATT_SIZEP(result) = len;
1307
1308         /* Fill data field of result string... */
1309         ptr = VARDATA(result);
1310         if (len1 > 0)
1311                 memcpy(ptr, VARDATA(t1), len1);
1312         if (len2 > 0)
1313                 memcpy(ptr + len1, VARDATA(t2), len2);
1314
1315         PG_RETURN_BYTEA_P(result);
1316 }
1317
1318 #define PG_STR_GET_BYTEA(str_) \
1319         DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1320 /*
1321  * bytea_substr()
1322  * Return a substring starting at the specified position.
1323  * Cloned from text_substr and modified as required.
1324  *
1325  * Input:
1326  *      - string
1327  *      - starting position (is one-based)
1328  *      - string length (optional)
1329  *
1330  * If the starting position is zero or less, then return from the start of the string
1331  * adjusting the length to be consistent with the "negative start" per SQL92.
1332  * If the length is less than zero, an ERROR is thrown. If no third argument
1333  * (length) is provided, the length to the end of the string is assumed.
1334  */
1335 Datum
1336 bytea_substr(PG_FUNCTION_ARGS)
1337 {
1338         int                     S = PG_GETARG_INT32(1); /* start position */
1339         int                     S1;                             /* adjusted start position */
1340         int                     L1;                             /* adjusted substring length */
1341
1342         S1 = Max(S, 1);
1343
1344         if (fcinfo->nargs == 2)
1345         {
1346                 /*
1347                  * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs everything to
1348                  * the end of the string if we pass it a negative value for length.
1349                  */
1350                 L1 = -1;
1351         }
1352         else
1353         {
1354                 /* end position */
1355                 int                     E = S + PG_GETARG_INT32(2);
1356
1357                 /*
1358                  * A negative value for L is the only way for the end position to be
1359                  * before the start. SQL99 says to throw an error.
1360                  */
1361                 if (E < S)
1362                         ereport(ERROR,
1363                                         (errcode(ERRCODE_SUBSTRING_ERROR),
1364                                          errmsg("negative substring length not allowed")));
1365
1366                 /*
1367                  * A zero or negative value for the end position can happen if the
1368                  * start was negative or one. SQL99 says to return a zero-length
1369                  * string.
1370                  */
1371                 if (E < 1)
1372                         PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1373
1374                 L1 = E - S1;
1375         }
1376
1377         /*
1378          * If the start position is past the end of the string, SQL99 says to
1379          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do that
1380          * for us. Convert to zero-based starting position
1381          */
1382         PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1383 }
1384
1385 /*
1386  * bytea_substr_no_len -
1387  *        Wrapper to avoid opr_sanity failure due to
1388  *        one function accepting a different number of args.
1389  */
1390 Datum
1391 bytea_substr_no_len(PG_FUNCTION_ARGS)
1392 {
1393         return bytea_substr(fcinfo);
1394 }
1395
1396 /*
1397  * byteapos -
1398  *        Return the position of the specified substring.
1399  *        Implements the SQL92 POSITION() function.
1400  * Cloned from textpos and modified as required.
1401  */
1402 Datum
1403 byteapos(PG_FUNCTION_ARGS)
1404 {
1405         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1406         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1407         int                     pos;
1408         int                     px,
1409                                 p;
1410         int                     len1,
1411                                 len2;
1412         char       *p1,
1413                            *p2;
1414
1415         if (VARSIZE(t2) <= VARHDRSZ)
1416                 PG_RETURN_INT32(1);             /* result for empty pattern */
1417
1418         len1 = VARSIZE(t1) - VARHDRSZ;
1419         len2 = VARSIZE(t2) - VARHDRSZ;
1420
1421         p1 = VARDATA(t1);
1422         p2 = VARDATA(t2);
1423
1424         pos = 0;
1425         px = (len1 - len2);
1426         for (p = 0; p <= px; p++)
1427         {
1428                 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1429                 {
1430                         pos = p + 1;
1431                         break;
1432                 };
1433                 p1++;
1434         };
1435
1436         PG_RETURN_INT32(pos);
1437 }
1438
1439 /*-------------------------------------------------------------
1440  * byteaGetByte
1441  *
1442  * this routine treats "bytea" as an array of bytes.
1443  * It returns the Nth byte (a number between 0 and 255).
1444  *-------------------------------------------------------------
1445  */
1446 Datum
1447 byteaGetByte(PG_FUNCTION_ARGS)
1448 {
1449         bytea      *v = PG_GETARG_BYTEA_P(0);
1450         int32           n = PG_GETARG_INT32(1);
1451         int                     len;
1452         int                     byte;
1453
1454         len = VARSIZE(v) - VARHDRSZ;
1455
1456         if (n < 0 || n >= len)
1457                 ereport(ERROR,
1458                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1459                                  errmsg("index %d out of valid range, 0..%d",
1460                                                 n, len - 1)));
1461
1462         byte = ((unsigned char *) VARDATA(v))[n];
1463
1464         PG_RETURN_INT32(byte);
1465 }
1466
1467 /*-------------------------------------------------------------
1468  * byteaGetBit
1469  *
1470  * This routine treats a "bytea" type like an array of bits.
1471  * It returns the value of the Nth bit (0 or 1).
1472  *
1473  *-------------------------------------------------------------
1474  */
1475 Datum
1476 byteaGetBit(PG_FUNCTION_ARGS)
1477 {
1478         bytea      *v = PG_GETARG_BYTEA_P(0);
1479         int32           n = PG_GETARG_INT32(1);
1480         int                     byteNo,
1481                                 bitNo;
1482         int                     len;
1483         int                     byte;
1484
1485         len = VARSIZE(v) - VARHDRSZ;
1486
1487         if (n < 0 || n >= len * 8)
1488                 ereport(ERROR,
1489                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1490                                  errmsg("index %d out of valid range, 0..%d",
1491                                                 n, len * 8 - 1)));
1492
1493         byteNo = n / 8;
1494         bitNo = n % 8;
1495
1496         byte = ((unsigned char *) VARDATA(v))[byteNo];
1497
1498         if (byte & (1 << bitNo))
1499                 PG_RETURN_INT32(1);
1500         else
1501                 PG_RETURN_INT32(0);
1502 }
1503
1504 /*-------------------------------------------------------------
1505  * byteaSetByte
1506  *
1507  * Given an instance of type 'bytea' creates a new one with
1508  * the Nth byte set to the given value.
1509  *
1510  *-------------------------------------------------------------
1511  */
1512 Datum
1513 byteaSetByte(PG_FUNCTION_ARGS)
1514 {
1515         bytea      *v = PG_GETARG_BYTEA_P(0);
1516         int32           n = PG_GETARG_INT32(1);
1517         int32           newByte = PG_GETARG_INT32(2);
1518         int                     len;
1519         bytea      *res;
1520
1521         len = VARSIZE(v) - VARHDRSZ;
1522
1523         if (n < 0 || n >= len)
1524                 ereport(ERROR,
1525                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1526                                  errmsg("index %d out of valid range, 0..%d",
1527                                                 n, len - 1)));
1528
1529         /*
1530          * Make a copy of the original varlena.
1531          */
1532         res = (bytea *) palloc(VARSIZE(v));
1533         memcpy((char *) res, (char *) v, VARSIZE(v));
1534
1535         /*
1536          * Now set the byte.
1537          */
1538         ((unsigned char *) VARDATA(res))[n] = newByte;
1539
1540         PG_RETURN_BYTEA_P(res);
1541 }
1542
1543 /*-------------------------------------------------------------
1544  * byteaSetBit
1545  *
1546  * Given an instance of type 'bytea' creates a new one with
1547  * the Nth bit set to the given value.
1548  *
1549  *-------------------------------------------------------------
1550  */
1551 Datum
1552 byteaSetBit(PG_FUNCTION_ARGS)
1553 {
1554         bytea      *v = PG_GETARG_BYTEA_P(0);
1555         int32           n = PG_GETARG_INT32(1);
1556         int32           newBit = PG_GETARG_INT32(2);
1557         bytea      *res;
1558         int                     len;
1559         int                     oldByte,
1560                                 newByte;
1561         int                     byteNo,
1562                                 bitNo;
1563
1564         len = VARSIZE(v) - VARHDRSZ;
1565
1566         if (n < 0 || n >= len * 8)
1567                 ereport(ERROR,
1568                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1569                                  errmsg("index %d out of valid range, 0..%d",
1570                                                 n, len * 8 - 1)));
1571
1572         byteNo = n / 8;
1573         bitNo = n % 8;
1574
1575         /*
1576          * sanity check!
1577          */
1578         if (newBit != 0 && newBit != 1)
1579                 ereport(ERROR,
1580                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1581                                  errmsg("new bit must be 0 or 1")));
1582
1583         /*
1584          * Make a copy of the original varlena.
1585          */
1586         res = (bytea *) palloc(VARSIZE(v));
1587         memcpy((char *) res, (char *) v, VARSIZE(v));
1588
1589         /*
1590          * Update the byte.
1591          */
1592         oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1593
1594         if (newBit == 0)
1595                 newByte = oldByte & (~(1 << bitNo));
1596         else
1597                 newByte = oldByte | (1 << bitNo);
1598
1599         ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1600
1601         PG_RETURN_BYTEA_P(res);
1602 }
1603
1604
1605 /* text_name()
1606  * Converts a text type to a Name type.
1607  */
1608 Datum
1609 text_name(PG_FUNCTION_ARGS)
1610 {
1611         text       *s = PG_GETARG_TEXT_P(0);
1612         Name            result;
1613         int                     len;
1614
1615         len = VARSIZE(s) - VARHDRSZ;
1616
1617         /* Truncate oversize input */
1618         if (len >= NAMEDATALEN)
1619                 len = NAMEDATALEN - 1;
1620
1621 #ifdef STRINGDEBUG
1622         printf("text- convert string length %d (%d) ->%d\n",
1623                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1624 #endif
1625
1626         result = (Name) palloc(NAMEDATALEN);
1627         memcpy(NameStr(*result), VARDATA(s), len);
1628
1629         /* now null pad to full length... */
1630         while (len < NAMEDATALEN)
1631         {
1632                 *(NameStr(*result) + len) = '\0';
1633                 len++;
1634         }
1635
1636         PG_RETURN_NAME(result);
1637 }
1638
1639 /* name_text()
1640  * Converts a Name type to a text type.
1641  */
1642 Datum
1643 name_text(PG_FUNCTION_ARGS)
1644 {
1645         Name            s = PG_GETARG_NAME(0);
1646         text       *result;
1647         int                     len;
1648
1649         len = strlen(NameStr(*s));
1650
1651 #ifdef STRINGDEBUG
1652         printf("text- convert string length %d (%d) ->%d\n",
1653                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1654 #endif
1655
1656         result = palloc(VARHDRSZ + len);
1657         VARATT_SIZEP(result) = VARHDRSZ + len;
1658         memcpy(VARDATA(result), NameStr(*s), len);
1659
1660         PG_RETURN_TEXT_P(result);
1661 }
1662
1663
1664 /*
1665  * textToQualifiedNameList - convert a text object to list of names
1666  *
1667  * This implements the input parsing needed by nextval() and other
1668  * functions that take a text parameter representing a qualified name.
1669  * We split the name at dots, downcase if not double-quoted, and
1670  * truncate names if they're too long.
1671  */
1672 List *
1673 textToQualifiedNameList(text *textval)
1674 {
1675         char       *rawname;
1676         List       *result = NIL;
1677         List       *namelist;
1678         ListCell   *l;
1679
1680         /* Convert to C string (handles possible detoasting). */
1681         /* Note we rely on being able to modify rawname below. */
1682         rawname = DatumGetCString(DirectFunctionCall1(textout,
1683                                                                                                   PointerGetDatum(textval)));
1684
1685         if (!SplitIdentifierString(rawname, '.', &namelist))
1686                 ereport(ERROR,
1687                                 (errcode(ERRCODE_INVALID_NAME),
1688                                  errmsg("invalid name syntax")));
1689
1690         if (namelist == NIL)
1691                 ereport(ERROR,
1692                                 (errcode(ERRCODE_INVALID_NAME),
1693                                  errmsg("invalid name syntax")));
1694
1695         foreach(l, namelist)
1696         {
1697                 char       *curname = (char *) lfirst(l);
1698
1699                 result = lappend(result, makeString(pstrdup(curname)));
1700         }
1701
1702         pfree(rawname);
1703         list_free(namelist);
1704
1705         return result;
1706 }
1707
1708 /*
1709  * SplitIdentifierString --- parse a string containing identifiers
1710  *
1711  * This is the guts of textToQualifiedNameList, and is exported for use in
1712  * other situations such as parsing GUC variables.      In the GUC case, it's
1713  * important to avoid memory leaks, so the API is designed to minimize the
1714  * amount of stuff that needs to be allocated and freed.
1715  *
1716  * Inputs:
1717  *      rawstring: the input string; must be overwritable!      On return, it's
1718  *                         been modified to contain the separated identifiers.
1719  *      separator: the separator punctuation expected between identifiers
1720  *                         (typically '.' or ',').      Whitespace may also appear around
1721  *                         identifiers.
1722  * Outputs:
1723  *      namelist: filled with a palloc'd list of pointers to identifiers within
1724  *                        rawstring.  Caller should list_free() this even on error return.
1725  *
1726  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1727  *
1728  * Note that an empty string is considered okay here, though not in
1729  * textToQualifiedNameList.
1730  */
1731 bool
1732 SplitIdentifierString(char *rawstring, char separator,
1733                                           List **namelist)
1734 {
1735         char       *nextp = rawstring;
1736         bool            done = false;
1737
1738         *namelist = NIL;
1739
1740         while (isspace((unsigned char) *nextp))
1741                 nextp++;                                /* skip leading whitespace */
1742
1743         if (*nextp == '\0')
1744                 return true;                    /* allow empty string */
1745
1746         /* At the top of the loop, we are at start of a new identifier. */
1747         do
1748         {
1749                 char       *curname;
1750                 char       *endp;
1751
1752                 if (*nextp == '\"')
1753                 {
1754                         /* Quoted name --- collapse quote-quote pairs, no downcasing */
1755                         curname = nextp + 1;
1756                         for (;;)
1757                         {
1758                                 endp = strchr(nextp + 1, '\"');
1759                                 if (endp == NULL)
1760                                         return false;           /* mismatched quotes */
1761                                 if (endp[1] != '\"')
1762                                         break;          /* found end of quoted name */
1763                                 /* Collapse adjacent quotes into one quote, and look again */
1764                                 memmove(endp, endp + 1, strlen(endp));
1765                                 nextp = endp;
1766                         }
1767                         /* endp now points at the terminating quote */
1768                         nextp = endp + 1;
1769                 }
1770                 else
1771                 {
1772                         /* Unquoted name --- extends to separator or whitespace */
1773                         char       *downname;
1774                         int                     len;
1775
1776                         curname = nextp;
1777                         while (*nextp && *nextp != separator &&
1778                                    !isspace((unsigned char) *nextp))
1779                                 nextp++;
1780                         endp = nextp;
1781                         if (curname == nextp)
1782                                 return false;   /* empty unquoted name not allowed */
1783
1784                         /*
1785                          * Downcase the identifier, using same code as main lexer does.
1786                          *
1787                          * XXX because we want to overwrite the input in-place, we cannot
1788                          * support a downcasing transformation that increases the string
1789                          * length.      This is not a problem given the current implementation
1790                          * of downcase_truncate_identifier, but we'll probably have to do
1791                          * something about this someday.
1792                          */
1793                         len = endp - curname;
1794                         downname = downcase_truncate_identifier(curname, len, false);
1795                         Assert(strlen(downname) <= len);
1796                         strncpy(curname, downname, len);
1797                         pfree(downname);
1798                 }
1799
1800                 while (isspace((unsigned char) *nextp))
1801                         nextp++;                        /* skip trailing whitespace */
1802
1803                 if (*nextp == separator)
1804                 {
1805                         nextp++;
1806                         while (isspace((unsigned char) *nextp))
1807                                 nextp++;                /* skip leading whitespace for next */
1808                         /* we expect another name, so done remains false */
1809                 }
1810                 else if (*nextp == '\0')
1811                         done = true;
1812                 else
1813                         return false;           /* invalid syntax */
1814
1815                 /* Now safe to overwrite separator with a null */
1816                 *endp = '\0';
1817
1818                 /* Truncate name if it's overlength */
1819                 truncate_identifier(curname, strlen(curname), false);
1820
1821                 /*
1822                  * Finished isolating current name --- add it to list
1823                  */
1824                 *namelist = lappend(*namelist, curname);
1825
1826                 /* Loop back if we didn't reach end of string */
1827         } while (!done);
1828
1829         return true;
1830 }
1831
1832
1833 /*****************************************************************************
1834  *      Comparison Functions used for bytea
1835  *
1836  * Note: btree indexes need these routines not to leak memory; therefore,
1837  * be careful to free working copies of toasted datums.  Most places don't
1838  * need to be so careful.
1839  *****************************************************************************/
1840
1841 Datum
1842 byteaeq(PG_FUNCTION_ARGS)
1843 {
1844         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1845         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1846         int                     len1,
1847                                 len2;
1848         bool            result;
1849
1850         len1 = VARSIZE(arg1) - VARHDRSZ;
1851         len2 = VARSIZE(arg2) - VARHDRSZ;
1852
1853         /* fast path for different-length inputs */
1854         if (len1 != len2)
1855                 result = false;
1856         else
1857                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1858
1859         PG_FREE_IF_COPY(arg1, 0);
1860         PG_FREE_IF_COPY(arg2, 1);
1861
1862         PG_RETURN_BOOL(result);
1863 }
1864
1865 Datum
1866 byteane(PG_FUNCTION_ARGS)
1867 {
1868         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1869         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1870         int                     len1,
1871                                 len2;
1872         bool            result;
1873
1874         len1 = VARSIZE(arg1) - VARHDRSZ;
1875         len2 = VARSIZE(arg2) - VARHDRSZ;
1876
1877         /* fast path for different-length inputs */
1878         if (len1 != len2)
1879                 result = true;
1880         else
1881                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1882
1883         PG_FREE_IF_COPY(arg1, 0);
1884         PG_FREE_IF_COPY(arg2, 1);
1885
1886         PG_RETURN_BOOL(result);
1887 }
1888
1889 Datum
1890 bytealt(PG_FUNCTION_ARGS)
1891 {
1892         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1893         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1894         int                     len1,
1895                                 len2;
1896         int                     cmp;
1897
1898         len1 = VARSIZE(arg1) - VARHDRSZ;
1899         len2 = VARSIZE(arg2) - VARHDRSZ;
1900
1901         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1902
1903         PG_FREE_IF_COPY(arg1, 0);
1904         PG_FREE_IF_COPY(arg2, 1);
1905
1906         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1907 }
1908
1909 Datum
1910 byteale(PG_FUNCTION_ARGS)
1911 {
1912         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1913         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1914         int                     len1,
1915                                 len2;
1916         int                     cmp;
1917
1918         len1 = VARSIZE(arg1) - VARHDRSZ;
1919         len2 = VARSIZE(arg2) - VARHDRSZ;
1920
1921         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1922
1923         PG_FREE_IF_COPY(arg1, 0);
1924         PG_FREE_IF_COPY(arg2, 1);
1925
1926         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1927 }
1928
1929 Datum
1930 byteagt(PG_FUNCTION_ARGS)
1931 {
1932         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1933         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1934         int                     len1,
1935                                 len2;
1936         int                     cmp;
1937
1938         len1 = VARSIZE(arg1) - VARHDRSZ;
1939         len2 = VARSIZE(arg2) - VARHDRSZ;
1940
1941         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1942
1943         PG_FREE_IF_COPY(arg1, 0);
1944         PG_FREE_IF_COPY(arg2, 1);
1945
1946         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1947 }
1948
1949 Datum
1950 byteage(PG_FUNCTION_ARGS)
1951 {
1952         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1953         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1954         int                     len1,
1955                                 len2;
1956         int                     cmp;
1957
1958         len1 = VARSIZE(arg1) - VARHDRSZ;
1959         len2 = VARSIZE(arg2) - VARHDRSZ;
1960
1961         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1962
1963         PG_FREE_IF_COPY(arg1, 0);
1964         PG_FREE_IF_COPY(arg2, 1);
1965
1966         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1967 }
1968
1969 Datum
1970 byteacmp(PG_FUNCTION_ARGS)
1971 {
1972         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1973         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1974         int                     len1,
1975                                 len2;
1976         int                     cmp;
1977
1978         len1 = VARSIZE(arg1) - VARHDRSZ;
1979         len2 = VARSIZE(arg2) - VARHDRSZ;
1980
1981         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1982         if ((cmp == 0) && (len1 != len2))
1983                 cmp = (len1 < len2) ? -1 : 1;
1984
1985         PG_FREE_IF_COPY(arg1, 0);
1986         PG_FREE_IF_COPY(arg2, 1);
1987
1988         PG_RETURN_INT32(cmp);
1989 }
1990
1991 /*
1992  * appendStringInfoText
1993  *
1994  * Append a text to str.
1995  * Like appendStringInfoString(str, PG_TEXT_GET_STR(s)) but faster.
1996  */
1997 static void
1998 appendStringInfoText(StringInfo str, const text *t)
1999 {
2000         appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
2001 }
2002
2003 /*
2004  * replace_text
2005  * replace all occurrences of 'old_sub_str' in 'orig_str'
2006  * with 'new_sub_str' to form 'new_str'
2007  *
2008  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
2009  * otherwise returns 'new_str'
2010  */
2011 Datum
2012 replace_text(PG_FUNCTION_ARGS)
2013 {
2014         text       *src_text = PG_GETARG_TEXT_P(0);
2015         text       *from_sub_text = PG_GETARG_TEXT_P(1);
2016         text       *to_sub_text = PG_GETARG_TEXT_P(2);
2017         int                     src_text_len = TEXTLEN(src_text);
2018         int                     from_sub_text_len = TEXTLEN(from_sub_text);
2019         text       *left_text;
2020         text       *right_text;
2021         text       *buf_text;
2022         text       *ret_text;
2023         int                     curr_posn;
2024         StringInfo      str;
2025
2026         if (src_text_len == 0 || from_sub_text_len == 0)
2027                 PG_RETURN_TEXT_P(src_text);
2028
2029         curr_posn = TEXTPOS(src_text, from_sub_text);
2030
2031         /* When the from_sub_text is not found, there is nothing to do. */
2032         if (curr_posn == 0)
2033                 PG_RETURN_TEXT_P(src_text);
2034
2035         str = makeStringInfo();
2036         buf_text = src_text;
2037
2038         while (curr_posn > 0)
2039         {
2040                 left_text = text_substring(PointerGetDatum(buf_text),
2041                                                                    1, curr_posn - 1, false);
2042                 right_text = text_substring(PointerGetDatum(buf_text),
2043                                                                         curr_posn + from_sub_text_len, -1, true);
2044
2045                 appendStringInfoText(str, left_text);
2046                 appendStringInfoText(str, to_sub_text);
2047
2048                 if (buf_text != src_text)
2049                         pfree(buf_text);
2050                 pfree(left_text);
2051                 buf_text = right_text;
2052                 curr_posn = TEXTPOS(buf_text, from_sub_text);
2053         }
2054
2055         appendStringInfoText(str, buf_text);
2056         if (buf_text != src_text)
2057                 pfree(buf_text);
2058
2059         ret_text = PG_STR_GET_TEXT(str->data);
2060         pfree(str->data);
2061         pfree(str);
2062
2063         PG_RETURN_TEXT_P(ret_text);
2064 }
2065
2066 /*
2067  * check_replace_text_has_escape_char
2068  *
2069  * check whether replace_text contains escape char.
2070  */
2071 static bool
2072 check_replace_text_has_escape_char(const text *replace_text)
2073 {
2074         const char *p = VARDATA(replace_text);
2075         const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2076
2077         if (pg_database_encoding_max_length() == 1)
2078         {
2079                 for (; p < p_end; p++)
2080                 {
2081                         if (*p == '\\')
2082                                 return true;
2083                 }
2084         }
2085         else
2086         {
2087                 for (; p < p_end; p += pg_mblen(p))
2088                 {
2089                         if (*p == '\\')
2090                                 return true;
2091                 }
2092         }
2093
2094         return false;
2095 }
2096
2097 /*
2098  * appendStringInfoRegexpSubstr
2099  *
2100  * Append replace_text to str, substituting regexp back references for
2101  * \n escapes.
2102  */
2103 static void
2104 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
2105                                                          regmatch_t *pmatch, text *src_text)
2106 {
2107         const char *p = VARDATA(replace_text);
2108         const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2109         int                     eml = pg_database_encoding_max_length();
2110
2111         for (;;)
2112         {
2113                 const char *chunk_start = p;
2114                 int                     so;
2115                 int                     eo;
2116
2117                 /* Find next escape char. */
2118                 if (eml == 1)
2119                 {
2120                         for (; p < p_end && *p != '\\'; p++)
2121                                  /* nothing */ ;
2122                 }
2123                 else
2124                 {
2125                         for (; p < p_end && *p != '\\'; p += pg_mblen(p))
2126                                  /* nothing */ ;
2127                 }
2128
2129                 /* Copy the text we just scanned over, if any. */
2130                 if (p > chunk_start)
2131                         appendBinaryStringInfo(str, chunk_start, p - chunk_start);
2132
2133                 /* Done if at end of string, else advance over escape char. */
2134                 if (p >= p_end)
2135                         break;
2136                 p++;
2137
2138                 if (p >= p_end)
2139                 {
2140                         /* Escape at very end of input.  Treat same as unexpected char */
2141                         appendStringInfoChar(str, '\\');
2142                         break;
2143                 }
2144
2145                 if (*p >= '1' && *p <= '9')
2146                 {
2147                         /* Use the back reference of regexp. */
2148                         int                     idx = *p - '0';
2149
2150                         so = pmatch[idx].rm_so;
2151                         eo = pmatch[idx].rm_eo;
2152                         p++;
2153                 }
2154                 else if (*p == '&')
2155                 {
2156                         /* Use the entire matched string. */
2157                         so = pmatch[0].rm_so;
2158                         eo = pmatch[0].rm_eo;
2159                         p++;
2160                 }
2161                 else if (*p == '\\')
2162                 {
2163                         /* \\ means transfer one \ to output. */
2164                         appendStringInfoChar(str, '\\');
2165                         p++;
2166                         continue;
2167                 }
2168                 else
2169                 {
2170                         /*
2171                          * If escape char is not followed by any expected char, just treat
2172                          * it as ordinary data to copy.  (XXX would it be better to throw
2173                          * an error?)
2174                          */
2175                         appendStringInfoChar(str, '\\');
2176                         continue;
2177                 }
2178
2179                 if (so != -1 && eo != -1)
2180                 {
2181                         /*
2182                          * Copy the text that is back reference of regexp.      Because so and
2183                          * eo are counted in characters not bytes, it's easiest to use
2184                          * text_substring to pull out the correct chunk of text.
2185                          */
2186                         text       *append_text;
2187
2188                         append_text = text_substring(PointerGetDatum(src_text),
2189                                                                                  so + 1, (eo - so), false);
2190                         appendStringInfoText(str, append_text);
2191                         pfree(append_text);
2192                 }
2193         }
2194 }
2195
2196 #define REGEXP_REPLACE_BACKREF_CNT              10
2197
2198 /*
2199  * replace_text_regexp
2200  *
2201  * replace text that matches to regexp in src_text to replace_text.
2202  *
2203  * Note: to avoid having to include regex.h in builtins.h, we declare
2204  * the regexp argument as void *, but really it's regex_t *.
2205  */
2206 text *
2207 replace_text_regexp(text *src_text, void *regexp,
2208                                         text *replace_text, bool glob)
2209 {
2210         text       *ret_text;
2211         regex_t    *re = (regex_t *) regexp;
2212         int                     src_text_len = VARSIZE(src_text) - VARHDRSZ;
2213         StringInfo      str = makeStringInfo();
2214         int                     regexec_result;
2215         regmatch_t      pmatch[REGEXP_REPLACE_BACKREF_CNT];
2216         pg_wchar   *data;
2217         size_t          data_len;
2218         int                     search_start;
2219         int                     data_pos;
2220         bool            have_escape;
2221
2222         /* Convert data string to wide characters. */
2223         data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
2224         data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len);
2225
2226         /* Check whether replace_text has escape char. */
2227         have_escape = check_replace_text_has_escape_char(replace_text);
2228
2229         for (search_start = data_pos = 0; search_start <= data_len;)
2230         {
2231                 regexec_result = pg_regexec(re,
2232                                                                         data,
2233                                                                         data_len,
2234                                                                         search_start,
2235                                                                         NULL,           /* no details */
2236                                                                         REGEXP_REPLACE_BACKREF_CNT,
2237                                                                         pmatch,
2238                                                                         0);
2239
2240                 if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
2241                 {
2242                         char            errMsg[100];
2243
2244                         /* re failed??? */
2245                         pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
2246                         ereport(ERROR,
2247                                         (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
2248                                          errmsg("regular expression failed: %s", errMsg)));
2249                 }
2250
2251                 if (regexec_result == REG_NOMATCH)
2252                         break;
2253
2254                 /*
2255                  * Copy the text to the left of the match position.  Because we are
2256                  * working with character not byte indexes, it's easiest to use
2257                  * text_substring to pull out the needed data.
2258                  */
2259                 if (pmatch[0].rm_so - data_pos > 0)
2260                 {
2261                         text       *left_text;
2262
2263                         left_text = text_substring(PointerGetDatum(src_text),
2264                                                                            data_pos + 1,
2265                                                                            pmatch[0].rm_so - data_pos,
2266                                                                            false);
2267                         appendStringInfoText(str, left_text);
2268                         pfree(left_text);
2269                 }
2270
2271                 /*
2272                  * Copy the replace_text. Process back references when the
2273                  * replace_text has escape characters.
2274                  */
2275                 if (have_escape)
2276                         appendStringInfoRegexpSubstr(str, replace_text, pmatch, src_text);
2277                 else
2278                         appendStringInfoText(str, replace_text);
2279
2280                 search_start = data_pos = pmatch[0].rm_eo;
2281
2282                 /*
2283                  * When global option is off, replace the first instance only.
2284                  */
2285                 if (!glob)
2286                         break;
2287
2288                 /*
2289                  * Search from next character when the matching text is zero width.
2290                  */
2291                 if (pmatch[0].rm_so == pmatch[0].rm_eo)
2292                         search_start++;
2293         }
2294
2295         /*
2296          * Copy the text to the right of the last match.
2297          */
2298         if (data_pos < data_len)
2299         {
2300                 text       *right_text;
2301
2302                 right_text = text_substring(PointerGetDatum(src_text),
2303                                                                         data_pos + 1, -1, true);
2304                 appendStringInfoText(str, right_text);
2305                 pfree(right_text);
2306         }
2307
2308         ret_text = PG_STR_GET_TEXT(str->data);
2309         pfree(str->data);
2310         pfree(str);
2311         pfree(data);
2312
2313         return ret_text;
2314 }
2315
2316 /*
2317  * split_text
2318  * parse input string
2319  * return ord item (1 based)
2320  * based on provided field separator
2321  */
2322 Datum
2323 split_text(PG_FUNCTION_ARGS)
2324 {
2325         text       *inputstring = PG_GETARG_TEXT_P(0);
2326         text       *fldsep = PG_GETARG_TEXT_P(1);
2327         int                     fldnum = PG_GETARG_INT32(2);
2328         int                     inputstring_len = TEXTLEN(inputstring);
2329         int                     fldsep_len = TEXTLEN(fldsep);
2330         int                     start_posn;
2331         int                     end_posn;
2332         text       *result_text;
2333
2334         /* field number is 1 based */
2335         if (fldnum < 1)
2336                 ereport(ERROR,
2337                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2338                                  errmsg("field position must be greater than zero")));
2339
2340         /* return empty string for empty input string */
2341         if (inputstring_len < 1)
2342                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2343
2344         /* empty field separator */
2345         if (fldsep_len < 1)
2346         {
2347                 /* if first field, return input string, else empty string */
2348                 if (fldnum == 1)
2349                         PG_RETURN_TEXT_P(inputstring);
2350                 else
2351                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2352         }
2353
2354         start_posn = text_position(inputstring, fldsep, fldnum - 1);
2355         end_posn = text_position(inputstring, fldsep, fldnum);
2356
2357         if ((start_posn == 0) && (end_posn == 0))       /* fldsep not found */
2358         {
2359                 /* if first field, return input string, else empty string */
2360                 if (fldnum == 1)
2361                         PG_RETURN_TEXT_P(inputstring);
2362                 else
2363                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2364         }
2365         else if (start_posn == 0)
2366         {
2367                 /* first field requested */
2368                 result_text = LEFT(inputstring, fldsep);
2369                 PG_RETURN_TEXT_P(result_text);
2370         }
2371         else if (end_posn == 0)
2372         {
2373                 /* last field requested */
2374                 result_text = text_substring(PointerGetDatum(inputstring),
2375                                                                          start_posn + fldsep_len,
2376                                                                          -1, true);
2377                 PG_RETURN_TEXT_P(result_text);
2378         }
2379         else
2380         {
2381                 /* interior field requested */
2382                 result_text = text_substring(PointerGetDatum(inputstring),
2383                                                                          start_posn + fldsep_len,
2384                                                                          end_posn - start_posn - fldsep_len,
2385                                                                          false);
2386                 PG_RETURN_TEXT_P(result_text);
2387         }
2388 }
2389
2390 /*
2391  * text_to_array
2392  * parse input string
2393  * return text array of elements
2394  * based on provided field separator
2395  */
2396 Datum
2397 text_to_array(PG_FUNCTION_ARGS)
2398 {
2399         text       *inputstring = PG_GETARG_TEXT_P(0);
2400         text       *fldsep = PG_GETARG_TEXT_P(1);
2401         int                     inputstring_len = TEXTLEN(inputstring);
2402         int                     fldsep_len = TEXTLEN(fldsep);
2403         int                     fldnum;
2404         int                     start_posn;
2405         int                     end_posn;
2406         text       *result_text;
2407         ArrayBuildState *astate = NULL;
2408
2409         /* return NULL for empty input string */
2410         if (inputstring_len < 1)
2411                 PG_RETURN_NULL();
2412
2413         /*
2414          * empty field separator return one element, 1D, array using the input
2415          * string
2416          */
2417         if (fldsep_len < 1)
2418                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2419                                                                                    CStringGetDatum(inputstring), 1));
2420
2421         /* start with end position holding the initial start position */
2422         end_posn = 0;
2423         for (fldnum = 1;; fldnum++) /* field number is 1 based */
2424         {
2425                 Datum           dvalue;
2426                 bool            disnull = false;
2427
2428                 start_posn = end_posn;
2429                 end_posn = text_position(inputstring, fldsep, fldnum);
2430
2431                 if ((start_posn == 0) && (end_posn == 0))               /* fldsep not found */
2432                 {
2433                         if (fldnum == 1)
2434                         {
2435                                 /*
2436                                  * first element return one element, 1D, array using the input
2437                                  * string
2438                                  */
2439                                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2440                                                                                    CStringGetDatum(inputstring), 1));
2441                         }
2442                         else
2443                         {
2444                                 /* otherwise create array and exit */
2445                                 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2446                                                                                                           CurrentMemoryContext));
2447                         }
2448                 }
2449                 else if (start_posn == 0)
2450                 {
2451                         /* first field requested */
2452                         result_text = LEFT(inputstring, fldsep);
2453                 }
2454                 else if (end_posn == 0)
2455                 {
2456                         /* last field requested */
2457                         result_text = text_substring(PointerGetDatum(inputstring),
2458                                                                                  start_posn + fldsep_len,
2459                                                                                  -1, true);
2460                 }
2461                 else
2462                 {
2463                         /* interior field requested */
2464                         result_text = text_substring(PointerGetDatum(inputstring),
2465                                                                                  start_posn + fldsep_len,
2466                                                                                  end_posn - start_posn - fldsep_len,
2467                                                                                  false);
2468                 }
2469
2470                 /* stash away current value */
2471                 dvalue = PointerGetDatum(result_text);
2472                 astate = accumArrayResult(astate, dvalue,
2473                                                                   disnull, TEXTOID,
2474                                                                   CurrentMemoryContext);
2475         }
2476
2477         /* never reached -- keep compiler quiet */
2478         PG_RETURN_NULL();
2479 }
2480
2481 /*
2482  * array_to_text
2483  * concatenate Cstring representation of input array elements
2484  * using provided field separator
2485  */
2486 Datum
2487 array_to_text(PG_FUNCTION_ARGS)
2488 {
2489         ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
2490         char       *fldsep = PG_TEXTARG_GET_STR(1);
2491         int                     nitems,
2492                            *dims,
2493                                 ndims;
2494         Oid                     element_type;
2495         int                     typlen;
2496         bool            typbyval;
2497         char            typalign;
2498         StringInfo      result_str = makeStringInfo();
2499         bool            printed = false;
2500         char       *p;
2501         bits8      *bitmap;
2502         int                     bitmask;
2503         int                     i;
2504         ArrayMetaState *my_extra;
2505
2506         ndims = ARR_NDIM(v);
2507         dims = ARR_DIMS(v);
2508         nitems = ArrayGetNItems(ndims, dims);
2509
2510         /* if there are no elements, return an empty string */
2511         if (nitems == 0)
2512                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2513
2514         element_type = ARR_ELEMTYPE(v);
2515
2516         /*
2517          * We arrange to look up info about element type, including its output
2518          * conversion proc, only once per series of calls, assuming the element
2519          * type doesn't change underneath us.
2520          */
2521         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2522         if (my_extra == NULL)
2523         {
2524                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2525                                                                                                           sizeof(ArrayMetaState));
2526                 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2527                 my_extra->element_type = ~element_type;
2528         }
2529
2530         if (my_extra->element_type != element_type)
2531         {
2532                 /*
2533                  * Get info about element type, including its output conversion proc
2534                  */
2535                 get_type_io_data(element_type, IOFunc_output,
2536                                                  &my_extra->typlen, &my_extra->typbyval,
2537                                                  &my_extra->typalign, &my_extra->typdelim,
2538                                                  &my_extra->typioparam, &my_extra->typiofunc);
2539                 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2540                                           fcinfo->flinfo->fn_mcxt);
2541                 my_extra->element_type = element_type;
2542         }
2543         typlen = my_extra->typlen;
2544         typbyval = my_extra->typbyval;
2545         typalign = my_extra->typalign;
2546
2547         p = ARR_DATA_PTR(v);
2548         bitmap = ARR_NULLBITMAP(v);
2549         bitmask = 1;
2550
2551         for (i = 0; i < nitems; i++)
2552         {
2553                 Datum           itemvalue;
2554                 char       *value;
2555
2556                 /* Get source element, checking for NULL */
2557                 if (bitmap && (*bitmap & bitmask) == 0)
2558                 {
2559                         /* we ignore nulls */
2560                 }
2561                 else
2562                 {
2563                         itemvalue = fetch_att(p, typbyval, typlen);
2564
2565                         value = DatumGetCString(FunctionCall1(&my_extra->proc,
2566                                                                                                   itemvalue));
2567
2568                         if (printed)
2569                                 appendStringInfo(result_str, "%s%s", fldsep, value);
2570                         else
2571                                 appendStringInfoString(result_str, value);
2572                         printed = true;
2573
2574                         p = att_addlength(p, typlen, PointerGetDatum(p));
2575                         p = (char *) att_align(p, typalign);
2576                 }
2577
2578                 /* advance bitmap pointer if any */
2579                 if (bitmap)
2580                 {
2581                         bitmask <<= 1;
2582                         if (bitmask == 0x100)
2583                         {
2584                                 bitmap++;
2585                                 bitmask = 1;
2586                         }
2587                 }
2588         }
2589
2590         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
2591 }
2592
2593 #define HEXBASE 16
2594 /*
2595  * Convert a int32 to a string containing a base 16 (hex) representation of
2596  * the number.
2597  */
2598 Datum
2599 to_hex32(PG_FUNCTION_ARGS)
2600 {
2601         uint32          value = (uint32) PG_GETARG_INT32(0);
2602         text       *result_text;
2603         char       *ptr;
2604         const char *digits = "0123456789abcdef";
2605         char            buf[32];                /* bigger than needed, but reasonable */
2606
2607         ptr = buf + sizeof(buf) - 1;
2608         *ptr = '\0';
2609
2610         do
2611         {
2612                 *--ptr = digits[value % HEXBASE];
2613                 value /= HEXBASE;
2614         } while (ptr > buf && value);
2615
2616         result_text = PG_STR_GET_TEXT(ptr);
2617         PG_RETURN_TEXT_P(result_text);
2618 }
2619
2620 /*
2621  * Convert a int64 to a string containing a base 16 (hex) representation of
2622  * the number.
2623  */
2624 Datum
2625 to_hex64(PG_FUNCTION_ARGS)
2626 {
2627         uint64          value = (uint64) PG_GETARG_INT64(0);
2628         text       *result_text;
2629         char       *ptr;
2630         const char *digits = "0123456789abcdef";
2631         char            buf[32];                /* bigger than needed, but reasonable */
2632
2633         ptr = buf + sizeof(buf) - 1;
2634         *ptr = '\0';
2635
2636         do
2637         {
2638                 *--ptr = digits[value % HEXBASE];
2639                 value /= HEXBASE;
2640         } while (ptr > buf && value);
2641
2642         result_text = PG_STR_GET_TEXT(ptr);
2643         PG_RETURN_TEXT_P(result_text);
2644 }
2645
2646 /*
2647  * Create an md5 hash of a text string and return it as hex
2648  *
2649  * md5 produces a 16 byte (128 bit) hash; double it for hex
2650  */
2651 #define MD5_HASH_LEN  32
2652
2653 Datum
2654 md5_text(PG_FUNCTION_ARGS)
2655 {
2656         text       *in_text = PG_GETARG_TEXT_P(0);
2657         size_t          len;
2658         char            hexsum[MD5_HASH_LEN + 1];
2659         text       *result_text;
2660
2661         /* Calculate the length of the buffer using varlena metadata */
2662         len = VARSIZE(in_text) - VARHDRSZ;
2663
2664         /* get the hash result */
2665         if (pg_md5_hash(VARDATA(in_text), len, hexsum) == false)
2666                 ereport(ERROR,
2667                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2668                                  errmsg("out of memory")));
2669
2670         /* convert to text and return it */
2671         result_text = PG_STR_GET_TEXT(hexsum);
2672         PG_RETURN_TEXT_P(result_text);
2673 }
2674
2675 /*
2676  * Create an md5 hash of a bytea field and return it as a hex string:
2677  * 16-byte md5 digest is represented in 32 hex characters.
2678  */
2679 Datum
2680 md5_bytea(PG_FUNCTION_ARGS)
2681 {
2682         bytea      *in = PG_GETARG_BYTEA_P(0);
2683         size_t          len;
2684         char            hexsum[MD5_HASH_LEN + 1];
2685         text       *result_text;
2686
2687         len = VARSIZE(in) - VARHDRSZ;
2688         if (pg_md5_hash(VARDATA(in), len, hexsum) == false)
2689                 ereport(ERROR,
2690                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2691                                  errmsg("out of memory")));
2692
2693         result_text = PG_STR_GET_TEXT(hexsum);
2694         PG_RETURN_TEXT_P(result_text);
2695 }
2696
2697 /*
2698  * Return the size of a datum, possibly compressed
2699  *
2700  * Works on any data type
2701  */
2702 Datum
2703 pg_column_size(PG_FUNCTION_ARGS)
2704 {
2705         Datum           value = PG_GETARG_DATUM(0);
2706         int32           result;
2707         int                     typlen;
2708
2709         /* On first call, get the input type's typlen, and save at *fn_extra */
2710         if (fcinfo->flinfo->fn_extra == NULL)
2711         {
2712                 /* Lookup the datatype of the supplied argument */
2713                 Oid                     argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
2714
2715                 typlen = get_typlen(argtypeid);
2716                 if (typlen == 0)                /* should not happen */
2717                         elog(ERROR, "cache lookup failed for type %u", argtypeid);
2718
2719                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2720                                                                                                           sizeof(int));
2721                 *((int *) fcinfo->flinfo->fn_extra) = typlen;
2722         }
2723         else
2724                 typlen = *((int *) fcinfo->flinfo->fn_extra);
2725
2726         if (typlen == -1)
2727         {
2728                 /* varlena type, possibly toasted */
2729                 result = toast_datum_size(value);
2730         }
2731         else if (typlen == -2)
2732         {
2733                 /* cstring */
2734                 result = strlen(DatumGetCString(value)) + 1;
2735         }
2736         else
2737         {
2738                 /* ordinary fixed-width type */
2739                 result = typlen;
2740         }
2741
2742         PG_RETURN_INT32(result);
2743 }