]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/varlena.c
Remove unnecessary parentheses in assignments.
[postgresql] / src / backend / utils / adt / varlena.c
1 /*-------------------------------------------------------------------------
2  *
3  * varlena.c
4  *        Functions for the variable-length built-in types.
5  *
6  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.129 2005/07/21 04:41:43 momjian Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include <ctype.h>
18
19 #include "access/tuptoaster.h"
20 #include "catalog/pg_type.h"
21 #include "lib/stringinfo.h"
22 #include "libpq/crypt.h"
23 #include "libpq/pqformat.h"
24 #include "mb/pg_wchar.h"
25 #include "miscadmin.h"
26 #include "parser/scansup.h"
27 #include "utils/array.h"
28 #include "utils/builtins.h"
29 #include "utils/lsyscache.h"
30 #include "utils/pg_locale.h"
31 #include "regex/regex.h"
32
33
34 typedef struct varlena unknown;
35
36 #define DatumGetUnknownP(X)                     ((unknown *) PG_DETOAST_DATUM(X))
37 #define DatumGetUnknownPCopy(X)         ((unknown *) PG_DETOAST_DATUM_COPY(X))
38 #define PG_GETARG_UNKNOWN_P(n)          DatumGetUnknownP(PG_GETARG_DATUM(n))
39 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
40 #define PG_RETURN_UNKNOWN_P(x)          PG_RETURN_POINTER(x)
41
42 #define PG_TEXTARG_GET_STR(arg_) \
43         DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
44 #define PG_TEXT_GET_STR(textp_) \
45         DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
46 #define PG_STR_GET_TEXT(str_) \
47         DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
48 #define TEXTLEN(textp) \
49         text_length(PointerGetDatum(textp))
50 #define TEXTPOS(buf_text, from_sub_text) \
51         text_position(buf_text, from_sub_text, 1)
52 #define LEFT(buf_text, from_sub_text) \
53         text_substring(PointerGetDatum(buf_text), \
54                                         1, \
55                                         TEXTPOS(buf_text, from_sub_text) - 1, false)
56
57 static int      text_cmp(text *arg1, text *arg2);
58 static int32 text_length(Datum str);
59 static int32 text_position(text *t1, text *t2, int matchnum);
60 static text *text_substring(Datum str,
61                            int32 start,
62                            int32 length,
63                            bool length_not_specified);
64
65 static void appendStringInfoText(StringInfo str, const text *t);
66
67
68 /*****************************************************************************
69  *       USER I/O ROUTINES                                                                                                               *
70  *****************************************************************************/
71
72
73 #define VAL(CH)                 ((CH) - '0')
74 #define DIG(VAL)                ((VAL) + '0')
75
76 /*
77  *              byteain                 - converts from printable representation of byte array
78  *
79  *              Non-printable characters must be passed as '\nnn' (octal) and are
80  *              converted to internal form.  '\' must be passed as '\\'.
81  *              ereport(ERROR, ...) if bad form.
82  *
83  *              BUGS:
84  *                              The input is scaned twice.
85  *                              The error checking of input is minimal.
86  */
87 Datum
88 byteain(PG_FUNCTION_ARGS)
89 {
90         char       *inputText = PG_GETARG_CSTRING(0);
91         char       *tp;
92         char       *rp;
93         int                     byte;
94         bytea      *result;
95
96         for (byte = 0, tp = inputText; *tp != '\0'; byte++)
97         {
98                 if (tp[0] != '\\')
99                         tp++;
100                 else if ((tp[0] == '\\') &&
101                                  (tp[1] >= '0' && tp[1] <= '3') &&
102                                  (tp[2] >= '0' && tp[2] <= '7') &&
103                                  (tp[3] >= '0' && tp[3] <= '7'))
104                         tp += 4;
105                 else if ((tp[0] == '\\') &&
106                                  (tp[1] == '\\'))
107                         tp += 2;
108                 else
109                 {
110                         /*
111                          * one backslash, not followed by 0 or ### valid octal
112                          */
113                         ereport(ERROR,
114                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
115                                          errmsg("invalid input syntax for type bytea")));
116                 }
117         }
118
119         byte += VARHDRSZ;
120         result = (bytea *) palloc(byte);
121         VARATT_SIZEP(result) = byte;    /* set varlena length */
122
123         tp = inputText;
124         rp = VARDATA(result);
125         while (*tp != '\0')
126         {
127                 if (tp[0] != '\\')
128                         *rp++ = *tp++;
129                 else if ((tp[0] == '\\') &&
130                                  (tp[1] >= '0' && tp[1] <= '3') &&
131                                  (tp[2] >= '0' && tp[2] <= '7') &&
132                                  (tp[3] >= '0' && tp[3] <= '7'))
133                 {
134                         byte = VAL(tp[1]);
135                         byte <<= 3;
136                         byte += VAL(tp[2]);
137                         byte <<= 3;
138                         *rp++ = byte + VAL(tp[3]);
139                         tp += 4;
140                 }
141                 else if ((tp[0] == '\\') &&
142                                  (tp[1] == '\\'))
143                 {
144                         *rp++ = '\\';
145                         tp += 2;
146                 }
147                 else
148                 {
149                         /*
150                          * We should never get here. The first pass should not allow
151                          * it.
152                          */
153                         ereport(ERROR,
154                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
155                                          errmsg("invalid input syntax for type bytea")));
156                 }
157         }
158
159         PG_RETURN_BYTEA_P(result);
160 }
161
162 /*
163  *              byteaout                - converts to printable representation of byte array
164  *
165  *              Non-printable characters are inserted as '\nnn' (octal) and '\' as
166  *              '\\'.
167  *
168  *              NULL vlena should be an error--returning string with NULL for now.
169  */
170 Datum
171 byteaout(PG_FUNCTION_ARGS)
172 {
173         bytea      *vlena = PG_GETARG_BYTEA_P(0);
174         char       *result;
175         char       *vp;
176         char       *rp;
177         int                     val;                    /* holds unprintable chars */
178         int                     i;
179         int                     len;
180
181         len = 1;                                        /* empty string has 1 char */
182         vp = VARDATA(vlena);
183         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
184         {
185                 if (*vp == '\\')
186                         len += 2;
187                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
188                         len += 4;
189                 else
190                         len++;
191         }
192         rp = result = (char *) palloc(len);
193         vp = VARDATA(vlena);
194         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
195         {
196                 if (*vp == '\\')
197                 {
198                         *rp++ = '\\';
199                         *rp++ = '\\';
200                 }
201                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
202                 {
203                         val = *vp;
204                         rp[0] = '\\';
205                         rp[3] = DIG(val & 07);
206                         val >>= 3;
207                         rp[2] = DIG(val & 07);
208                         val >>= 3;
209                         rp[1] = DIG(val & 03);
210                         rp += 4;
211                 }
212                 else
213                         *rp++ = *vp;
214         }
215         *rp = '\0';
216         PG_RETURN_CSTRING(result);
217 }
218
219 /*
220  *              bytearecv                       - converts external binary format to bytea
221  */
222 Datum
223 bytearecv(PG_FUNCTION_ARGS)
224 {
225         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
226         bytea      *result;
227         int                     nbytes;
228
229         nbytes = buf->len - buf->cursor;
230         result = (bytea *) palloc(nbytes + VARHDRSZ);
231         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
232         pq_copymsgbytes(buf, VARDATA(result), nbytes);
233         PG_RETURN_BYTEA_P(result);
234 }
235
236 /*
237  *              byteasend                       - converts bytea to binary format
238  *
239  * This is a special case: just copy the input...
240  */
241 Datum
242 byteasend(PG_FUNCTION_ARGS)
243 {
244         bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
245
246         PG_RETURN_BYTEA_P(vlena);
247 }
248
249
250 /*
251  *              textin                  - converts "..." to internal representation
252  */
253 Datum
254 textin(PG_FUNCTION_ARGS)
255 {
256         char       *inputText = PG_GETARG_CSTRING(0);
257         text       *result;
258         int                     len;
259
260         /* verify encoding */
261         len = strlen(inputText);
262         pg_verifymbstr(inputText, len, false);
263
264         result = (text *) palloc(len + VARHDRSZ);
265         VARATT_SIZEP(result) = len + VARHDRSZ;
266
267         memcpy(VARDATA(result), inputText, len);
268
269         PG_RETURN_TEXT_P(result);
270 }
271
272 /*
273  *              textout                 - converts internal representation to "..."
274  */
275 Datum
276 textout(PG_FUNCTION_ARGS)
277 {
278         text       *t = PG_GETARG_TEXT_P(0);
279         int                     len;
280         char       *result;
281
282         len = VARSIZE(t) - VARHDRSZ;
283         result = (char *) palloc(len + 1);
284         memcpy(result, VARDATA(t), len);
285         result[len] = '\0';
286
287         PG_RETURN_CSTRING(result);
288 }
289
290 /*
291  *              textrecv                        - converts external binary format to text
292  */
293 Datum
294 textrecv(PG_FUNCTION_ARGS)
295 {
296         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
297         text       *result;
298         char       *str;
299         int                     nbytes;
300
301         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
302
303         /* verify encoding */
304         pg_verifymbstr(str, nbytes, false);
305
306         result = (text *) palloc(nbytes + VARHDRSZ);
307         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
308         memcpy(VARDATA(result), str, nbytes);
309         pfree(str);
310         PG_RETURN_TEXT_P(result);
311 }
312
313 /*
314  *              textsend                        - converts text to binary format
315  */
316 Datum
317 textsend(PG_FUNCTION_ARGS)
318 {
319         text       *t = PG_GETARG_TEXT_P(0);
320         StringInfoData buf;
321
322         pq_begintypsend(&buf);
323         pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
324         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
325 }
326
327
328 /*
329  *              unknownin                       - converts "..." to internal representation
330  */
331 Datum
332 unknownin(PG_FUNCTION_ARGS)
333 {
334         char       *str = PG_GETARG_CSTRING(0);
335
336         /* representation is same as cstring */
337         PG_RETURN_CSTRING(pstrdup(str));
338 }
339
340 /*
341  *              unknownout                      - converts internal representation to "..."
342  */
343 Datum
344 unknownout(PG_FUNCTION_ARGS)
345 {
346         /* representation is same as cstring */
347         char       *str = PG_GETARG_CSTRING(0);
348
349         PG_RETURN_CSTRING(pstrdup(str));
350 }
351
352 /*
353  *              unknownrecv                     - converts external binary format to unknown
354  */
355 Datum
356 unknownrecv(PG_FUNCTION_ARGS)
357 {
358         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
359         char       *str;
360         int                     nbytes;
361
362         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
363         /* representation is same as cstring */
364         PG_RETURN_CSTRING(str);
365 }
366
367 /*
368  *              unknownsend                     - converts unknown to binary format
369  */
370 Datum
371 unknownsend(PG_FUNCTION_ARGS)
372 {
373         /* representation is same as cstring */
374         char       *str = PG_GETARG_CSTRING(0);
375         StringInfoData buf;
376
377         pq_begintypsend(&buf);
378         pq_sendtext(&buf, str, strlen(str));
379         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
380 }
381
382
383 /* ========== PUBLIC ROUTINES ========== */
384
385 /*
386  * textlen -
387  *        returns the logical length of a text*
388  *         (which is less than the VARSIZE of the text*)
389  */
390 Datum
391 textlen(PG_FUNCTION_ARGS)
392 {
393         Datum           str = PG_GETARG_DATUM(0);
394
395         /* try to avoid decompressing argument */
396         PG_RETURN_INT32(text_length(str));
397 }
398
399 /*
400  * text_length -
401  *      Does the real work for textlen()
402  *
403  *      This is broken out so it can be called directly by other string processing
404  *      functions.      Note that the argument is passed as a Datum, to indicate that
405  *      it may still be in compressed form.  We can avoid decompressing it at all
406  *      in some cases.
407  */
408 static int32
409 text_length(Datum str)
410 {
411         /* fastpath when max encoding length is one */
412         if (pg_database_encoding_max_length() == 1)
413                 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
414         else
415         {
416                 text       *t = DatumGetTextP(str);
417
418                 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
419                                                                                          VARSIZE(t) - VARHDRSZ));
420         }
421 }
422
423 /*
424  * textoctetlen -
425  *        returns the physical length of a text*
426  *         (which is less than the VARSIZE of the text*)
427  */
428 Datum
429 textoctetlen(PG_FUNCTION_ARGS)
430 {
431         Datum           str = PG_GETARG_DATUM(0);
432
433         /* We need not detoast the input at all */
434         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
435 }
436
437 /*
438  * textcat -
439  *        takes two text* and returns a text* that is the concatenation of
440  *        the two.
441  *
442  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
443  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
444  * Allocate space for output in all cases.
445  * XXX - thomas 1997-07-10
446  */
447 Datum
448 textcat(PG_FUNCTION_ARGS)
449 {
450         text       *t1 = PG_GETARG_TEXT_P(0);
451         text       *t2 = PG_GETARG_TEXT_P(1);
452         int                     len1,
453                                 len2,
454                                 len;
455         text       *result;
456         char       *ptr;
457
458         len1 = VARSIZE(t1) - VARHDRSZ;
459         if (len1 < 0)
460                 len1 = 0;
461
462         len2 = VARSIZE(t2) - VARHDRSZ;
463         if (len2 < 0)
464                 len2 = 0;
465
466         len = len1 + len2 + VARHDRSZ;
467         result = (text *) palloc(len);
468
469         /* Set size of result string... */
470         VARATT_SIZEP(result) = len;
471
472         /* Fill data field of result string... */
473         ptr = VARDATA(result);
474         if (len1 > 0)
475                 memcpy(ptr, VARDATA(t1), len1);
476         if (len2 > 0)
477                 memcpy(ptr + len1, VARDATA(t2), len2);
478
479         PG_RETURN_TEXT_P(result);
480 }
481
482 /*
483  * text_substr()
484  * Return a substring starting at the specified position.
485  * - thomas 1997-12-31
486  *
487  * Input:
488  *      - string
489  *      - starting position (is one-based)
490  *      - string length
491  *
492  * If the starting position is zero or less, then return from the start of the string
493  *      adjusting the length to be consistent with the "negative start" per SQL92.
494  * If the length is less than zero, return the remaining string.
495  *
496  * Added multibyte support.
497  * - Tatsuo Ishii 1998-4-21
498  * Changed behavior if starting position is less than one to conform to SQL92 behavior.
499  * Formerly returned the entire string; now returns a portion.
500  * - Thomas Lockhart 1998-12-10
501  * Now uses faster TOAST-slicing interface
502  * - John Gray 2002-02-22
503  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
504  * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
505  * error; if E < 1, return '', not entire string). Fixed MB related bug when
506  * S > LC and < LC + 4 sometimes garbage characters are returned.
507  * - Joe Conway 2002-08-10
508  */
509 Datum
510 text_substr(PG_FUNCTION_ARGS)
511 {
512         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
513                                                                         PG_GETARG_INT32(1),
514                                                                         PG_GETARG_INT32(2),
515                                                                         false));
516 }
517
518 /*
519  * text_substr_no_len -
520  *        Wrapper to avoid opr_sanity failure due to
521  *        one function accepting a different number of args.
522  */
523 Datum
524 text_substr_no_len(PG_FUNCTION_ARGS)
525 {
526         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
527                                                                         PG_GETARG_INT32(1),
528                                                                         -1, true));
529 }
530
531 /*
532  * text_substring -
533  *      Does the real work for text_substr() and text_substr_no_len()
534  *
535  *      This is broken out so it can be called directly by other string processing
536  *      functions.      Note that the argument is passed as a Datum, to indicate that
537  *      it may still be in compressed/toasted form.  We can avoid detoasting all
538  *      of it in some cases.
539  */
540 static text *
541 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
542 {
543         int32           eml = pg_database_encoding_max_length();
544         int32           S = start;              /* start position */
545         int32           S1;                             /* adjusted start position */
546         int32           L1;                             /* adjusted substring length */
547
548         /* life is easy if the encoding max length is 1 */
549         if (eml == 1)
550         {
551                 S1 = Max(S, 1);
552
553                 if (length_not_specified)               /* special case - get length to
554                                                                                  * end of string */
555                         L1 = -1;
556                 else
557                 {
558                         /* end position */
559                         int                     E = S + length;
560
561                         /*
562                          * A negative value for L is the only way for the end position
563                          * to be before the start. SQL99 says to throw an error.
564                          */
565                         if (E < S)
566                                 ereport(ERROR,
567                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
568                                            errmsg("negative substring length not allowed")));
569
570                         /*
571                          * A zero or negative value for the end position can happen if
572                          * the start was negative or one. SQL99 says to return a
573                          * zero-length string.
574                          */
575                         if (E < 1)
576                                 return PG_STR_GET_TEXT("");
577
578                         L1 = E - S1;
579                 }
580
581                 /*
582                  * If the start position is past the end of the string, SQL99 says
583                  * to return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will
584                  * do that for us. Convert to zero-based starting position
585                  */
586                 return DatumGetTextPSlice(str, S1 - 1, L1);
587         }
588         else if (eml > 1)
589         {
590                 /*
591                  * When encoding max length is > 1, we can't get LC without
592                  * detoasting, so we'll grab a conservatively large slice now and
593                  * go back later to do the right thing
594                  */
595                 int32           slice_start;
596                 int32           slice_size;
597                 int32           slice_strlen;
598                 text       *slice;
599                 int32           E1;
600                 int32           i;
601                 char       *p;
602                 char       *s;
603                 text       *ret;
604
605                 /*
606                  * if S is past the end of the string, the tuple toaster will
607                  * return a zero-length string to us
608                  */
609                 S1 = Max(S, 1);
610
611                 /*
612                  * We need to start at position zero because there is no way to
613                  * know in advance which byte offset corresponds to the supplied
614                  * start position.
615                  */
616                 slice_start = 0;
617
618                 if (length_not_specified)               /* special case - get length to
619                                                                                  * end of string */
620                         slice_size = L1 = -1;
621                 else
622                 {
623                         int                     E = S + length;
624
625                         /*
626                          * A negative value for L is the only way for the end position
627                          * to be before the start. SQL99 says to throw an error.
628                          */
629                         if (E < S)
630                                 ereport(ERROR,
631                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
632                                            errmsg("negative substring length not allowed")));
633
634                         /*
635                          * A zero or negative value for the end position can happen if
636                          * the start was negative or one. SQL99 says to return a
637                          * zero-length string.
638                          */
639                         if (E < 1)
640                                 return PG_STR_GET_TEXT("");
641
642                         /*
643                          * if E is past the end of the string, the tuple toaster will
644                          * truncate the length for us
645                          */
646                         L1 = E - S1;
647
648                         /*
649                          * Total slice size in bytes can't be any longer than the
650                          * start position plus substring length times the encoding max
651                          * length.
652                          */
653                         slice_size = (S1 + L1) * eml;
654                 }
655                 slice = DatumGetTextPSlice(str, slice_start, slice_size);
656
657                 /* see if we got back an empty string */
658                 if ((VARSIZE(slice) - VARHDRSZ) == 0)
659                         return PG_STR_GET_TEXT("");
660
661                 /* Now we can get the actual length of the slice in MB characters */
662                 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
663
664                 /*
665                  * Check that the start position wasn't > slice_strlen. If so,
666                  * SQL99 says to return a zero-length string.
667                  */
668                 if (S1 > slice_strlen)
669                         return PG_STR_GET_TEXT("");
670
671                 /*
672                  * Adjust L1 and E1 now that we know the slice string length.
673                  * Again remember that S1 is one based, and slice_start is zero
674                  * based.
675                  */
676                 if (L1 > -1)
677                         E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
678                 else
679                         E1 = slice_start + 1 + slice_strlen;
680
681                 /*
682                  * Find the start position in the slice; remember S1 is not zero
683                  * based
684                  */
685                 p = VARDATA(slice);
686                 for (i = 0; i < S1 - 1; i++)
687                         p += pg_mblen(p);
688
689                 /* hang onto a pointer to our start position */
690                 s = p;
691
692                 /*
693                  * Count the actual bytes used by the substring of the requested
694                  * length.
695                  */
696                 for (i = S1; i < E1; i++)
697                         p += pg_mblen(p);
698
699                 ret = (text *) palloc(VARHDRSZ + (p - s));
700                 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
701                 memcpy(VARDATA(ret), s, (p - s));
702
703                 return ret;
704         }
705         else
706                 elog(ERROR, "invalid backend encoding: encoding max length < 1");
707
708         /* not reached: suppress compiler warning */
709         return NULL;
710 }
711
712 /*
713  * textpos -
714  *        Return the position of the specified substring.
715  *        Implements the SQL92 POSITION() function.
716  *        Ref: A Guide To The SQL Standard, Date & Darwen, 1997
717  * - thomas 1997-07-27
718  */
719 Datum
720 textpos(PG_FUNCTION_ARGS)
721 {
722         text       *str = PG_GETARG_TEXT_P(0);
723         text       *search_str = PG_GETARG_TEXT_P(1);
724
725         PG_RETURN_INT32(text_position(str, search_str, 1));
726 }
727
728 /*
729  * text_position -
730  *      Does the real work for textpos()
731  *
732  * Inputs:
733  *              t1 - string to be searched
734  *              t2 - pattern to match within t1
735  *              matchnum - number of the match to be found (1 is the first match)
736  * Result:
737  *              Character index of the first matched char, starting from 1,
738  *              or 0 if no match.
739  *
740  *      This is broken out so it can be called directly by other string processing
741  *      functions.
742  */
743 static int32
744 text_position(text *t1, text *t2, int matchnum)
745 {
746         int                     match = 0,
747                                 pos = 0,
748                                 p,
749                                 px,
750                                 len1,
751                                 len2;
752
753         if (matchnum <= 0)
754                 return 0;                               /* result for 0th match */
755
756         if (VARSIZE(t2) <= VARHDRSZ)
757                 return 1;                               /* result for empty pattern */
758
759         len1 = VARSIZE(t1) - VARHDRSZ;
760         len2 = VARSIZE(t2) - VARHDRSZ;
761
762         if (pg_database_encoding_max_length() == 1)
763         {
764                 /* simple case - single byte encoding */
765                 char       *p1,
766                                    *p2;
767
768                 p1 = VARDATA(t1);
769                 p2 = VARDATA(t2);
770
771                 /* no use in searching str past point where search_str will fit */
772                 px = (len1 - len2);
773
774                 for (p = 0; p <= px; p++)
775                 {
776                         if ((*p1 == *p2) && (strncmp(p1, p2, len2) == 0))
777                         {
778                                 if (++match == matchnum)
779                                 {
780                                         pos = p + 1;
781                                         break;
782                                 }
783                         }
784                         p1++;
785                 }
786         }
787         else
788         {
789                 /* not as simple - multibyte encoding */
790                 pg_wchar   *p1,
791                                    *p2,
792                                    *ps1,
793                                    *ps2;
794
795                 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
796                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
797                 len1 = pg_wchar_strlen(p1);
798                 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
799                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
800                 len2 = pg_wchar_strlen(p2);
801
802                 /* no use in searching str past point where search_str will fit */
803                 px = (len1 - len2);
804
805                 for (p = 0; p <= px; p++)
806                 {
807                         if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, len2) == 0))
808                         {
809                                 if (++match == matchnum)
810                                 {
811                                         pos = p + 1;
812                                         break;
813                                 }
814                         }
815                         p1++;
816                 }
817
818                 pfree(ps1);
819                 pfree(ps2);
820         }
821
822         return pos;
823 }
824
825 /* varstr_cmp()
826  * Comparison function for text strings with given lengths.
827  * Includes locale support, but must copy strings to temporary memory
828  *      to allow null-termination for inputs to strcoll().
829  * Returns -1, 0 or 1
830  */
831 int
832 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
833 {
834         int                     result;
835
836         /*
837          * Unfortunately, there is no strncoll(), so in the non-C locale case
838          * we have to do some memory copying.  This turns out to be
839          * significantly slower, so we optimize the case where LC_COLLATE is
840          * C.  We also try to optimize relatively-short strings by avoiding
841          * palloc/pfree overhead.
842          */
843 #define STACKBUFLEN             1024
844
845         if (!lc_collate_is_c())
846         {
847                 char            a1buf[STACKBUFLEN];
848                 char            a2buf[STACKBUFLEN];
849                 char       *a1p,
850                                    *a2p;
851
852                 if (len1 >= STACKBUFLEN)
853                         a1p = (char *) palloc(len1 + 1);
854                 else
855                         a1p = a1buf;
856                 if (len2 >= STACKBUFLEN)
857                         a2p = (char *) palloc(len2 + 1);
858                 else
859                         a2p = a2buf;
860
861                 memcpy(a1p, arg1, len1);
862                 a1p[len1] = '\0';
863                 memcpy(a2p, arg2, len2);
864                 a2p[len2] = '\0';
865
866                 result = strcoll(a1p, a2p);
867
868                 if (len1 >= STACKBUFLEN)
869                         pfree(a1p);
870                 if (len2 >= STACKBUFLEN)
871                         pfree(a2p);
872         }
873         else
874         {
875                 result = strncmp(arg1, arg2, Min(len1, len2));
876                 if ((result == 0) && (len1 != len2))
877                         result = (len1 < len2) ? -1 : 1;
878         }
879
880         return result;
881 }
882
883
884 /* text_cmp()
885  * Internal comparison function for text strings.
886  * Returns -1, 0 or 1
887  */
888 static int
889 text_cmp(text *arg1, text *arg2)
890 {
891         char       *a1p,
892                            *a2p;
893         int                     len1,
894                                 len2;
895
896         a1p = VARDATA(arg1);
897         a2p = VARDATA(arg2);
898
899         len1 = VARSIZE(arg1) - VARHDRSZ;
900         len2 = VARSIZE(arg2) - VARHDRSZ;
901
902         return varstr_cmp(a1p, len1, a2p, len2);
903 }
904
905 /*
906  * Comparison functions for text strings.
907  *
908  * Note: btree indexes need these routines not to leak memory; therefore,
909  * be careful to free working copies of toasted datums.  Most places don't
910  * need to be so careful.
911  */
912
913 Datum
914 texteq(PG_FUNCTION_ARGS)
915 {
916         text       *arg1 = PG_GETARG_TEXT_P(0);
917         text       *arg2 = PG_GETARG_TEXT_P(1);
918         bool            result;
919
920         /* fast path for different-length inputs */
921         if (VARSIZE(arg1) != VARSIZE(arg2))
922                 result = false;
923         else
924                 result = (text_cmp(arg1, arg2) == 0);
925
926         PG_FREE_IF_COPY(arg1, 0);
927         PG_FREE_IF_COPY(arg2, 1);
928
929         PG_RETURN_BOOL(result);
930 }
931
932 Datum
933 textne(PG_FUNCTION_ARGS)
934 {
935         text       *arg1 = PG_GETARG_TEXT_P(0);
936         text       *arg2 = PG_GETARG_TEXT_P(1);
937         bool            result;
938
939         /* fast path for different-length inputs */
940         if (VARSIZE(arg1) != VARSIZE(arg2))
941                 result = true;
942         else
943                 result = (text_cmp(arg1, arg2) != 0);
944
945         PG_FREE_IF_COPY(arg1, 0);
946         PG_FREE_IF_COPY(arg2, 1);
947
948         PG_RETURN_BOOL(result);
949 }
950
951 Datum
952 text_lt(PG_FUNCTION_ARGS)
953 {
954         text       *arg1 = PG_GETARG_TEXT_P(0);
955         text       *arg2 = PG_GETARG_TEXT_P(1);
956         bool            result;
957
958         result = (text_cmp(arg1, arg2) < 0);
959
960         PG_FREE_IF_COPY(arg1, 0);
961         PG_FREE_IF_COPY(arg2, 1);
962
963         PG_RETURN_BOOL(result);
964 }
965
966 Datum
967 text_le(PG_FUNCTION_ARGS)
968 {
969         text       *arg1 = PG_GETARG_TEXT_P(0);
970         text       *arg2 = PG_GETARG_TEXT_P(1);
971         bool            result;
972
973         result = (text_cmp(arg1, arg2) <= 0);
974
975         PG_FREE_IF_COPY(arg1, 0);
976         PG_FREE_IF_COPY(arg2, 1);
977
978         PG_RETURN_BOOL(result);
979 }
980
981 Datum
982 text_gt(PG_FUNCTION_ARGS)
983 {
984         text       *arg1 = PG_GETARG_TEXT_P(0);
985         text       *arg2 = PG_GETARG_TEXT_P(1);
986         bool            result;
987
988         result = (text_cmp(arg1, arg2) > 0);
989
990         PG_FREE_IF_COPY(arg1, 0);
991         PG_FREE_IF_COPY(arg2, 1);
992
993         PG_RETURN_BOOL(result);
994 }
995
996 Datum
997 text_ge(PG_FUNCTION_ARGS)
998 {
999         text       *arg1 = PG_GETARG_TEXT_P(0);
1000         text       *arg2 = PG_GETARG_TEXT_P(1);
1001         bool            result;
1002
1003         result = (text_cmp(arg1, arg2) >= 0);
1004
1005         PG_FREE_IF_COPY(arg1, 0);
1006         PG_FREE_IF_COPY(arg2, 1);
1007
1008         PG_RETURN_BOOL(result);
1009 }
1010
1011 Datum
1012 bttextcmp(PG_FUNCTION_ARGS)
1013 {
1014         text       *arg1 = PG_GETARG_TEXT_P(0);
1015         text       *arg2 = PG_GETARG_TEXT_P(1);
1016         int32           result;
1017
1018         result = text_cmp(arg1, arg2);
1019
1020         PG_FREE_IF_COPY(arg1, 0);
1021         PG_FREE_IF_COPY(arg2, 1);
1022
1023         PG_RETURN_INT32(result);
1024 }
1025
1026
1027 Datum
1028 text_larger(PG_FUNCTION_ARGS)
1029 {
1030         text       *arg1 = PG_GETARG_TEXT_P(0);
1031         text       *arg2 = PG_GETARG_TEXT_P(1);
1032         text       *result;
1033
1034         result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1035
1036         PG_RETURN_TEXT_P(result);
1037 }
1038
1039 Datum
1040 text_smaller(PG_FUNCTION_ARGS)
1041 {
1042         text       *arg1 = PG_GETARG_TEXT_P(0);
1043         text       *arg2 = PG_GETARG_TEXT_P(1);
1044         text       *result;
1045
1046         result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1047
1048         PG_RETURN_TEXT_P(result);
1049 }
1050
1051
1052 /*
1053  * The following operators support character-by-character comparison
1054  * of text data types, to allow building indexes suitable for LIKE
1055  * clauses.
1056  */
1057
1058 static int
1059 internal_text_pattern_compare(text *arg1, text *arg2)
1060 {
1061         int                     result;
1062
1063         result = memcmp(VARDATA(arg1), VARDATA(arg2),
1064                                         Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1065         if (result != 0)
1066                 return result;
1067         else if (VARSIZE(arg1) < VARSIZE(arg2))
1068                 return -1;
1069         else if (VARSIZE(arg1) > VARSIZE(arg2))
1070                 return 1;
1071         else
1072                 return 0;
1073 }
1074
1075
1076 Datum
1077 text_pattern_lt(PG_FUNCTION_ARGS)
1078 {
1079         text       *arg1 = PG_GETARG_TEXT_P(0);
1080         text       *arg2 = PG_GETARG_TEXT_P(1);
1081         int                     result;
1082
1083         result = internal_text_pattern_compare(arg1, arg2);
1084
1085         PG_FREE_IF_COPY(arg1, 0);
1086         PG_FREE_IF_COPY(arg2, 1);
1087
1088         PG_RETURN_BOOL(result < 0);
1089 }
1090
1091
1092 Datum
1093 text_pattern_le(PG_FUNCTION_ARGS)
1094 {
1095         text       *arg1 = PG_GETARG_TEXT_P(0);
1096         text       *arg2 = PG_GETARG_TEXT_P(1);
1097         int                     result;
1098
1099         result = internal_text_pattern_compare(arg1, arg2);
1100
1101         PG_FREE_IF_COPY(arg1, 0);
1102         PG_FREE_IF_COPY(arg2, 1);
1103
1104         PG_RETURN_BOOL(result <= 0);
1105 }
1106
1107
1108 Datum
1109 text_pattern_eq(PG_FUNCTION_ARGS)
1110 {
1111         text       *arg1 = PG_GETARG_TEXT_P(0);
1112         text       *arg2 = PG_GETARG_TEXT_P(1);
1113         int                     result;
1114
1115         if (VARSIZE(arg1) != VARSIZE(arg2))
1116                 result = 1;
1117         else
1118                 result = internal_text_pattern_compare(arg1, arg2);
1119
1120         PG_FREE_IF_COPY(arg1, 0);
1121         PG_FREE_IF_COPY(arg2, 1);
1122
1123         PG_RETURN_BOOL(result == 0);
1124 }
1125
1126
1127 Datum
1128 text_pattern_ge(PG_FUNCTION_ARGS)
1129 {
1130         text       *arg1 = PG_GETARG_TEXT_P(0);
1131         text       *arg2 = PG_GETARG_TEXT_P(1);
1132         int                     result;
1133
1134         result = internal_text_pattern_compare(arg1, arg2);
1135
1136         PG_FREE_IF_COPY(arg1, 0);
1137         PG_FREE_IF_COPY(arg2, 1);
1138
1139         PG_RETURN_BOOL(result >= 0);
1140 }
1141
1142
1143 Datum
1144 text_pattern_gt(PG_FUNCTION_ARGS)
1145 {
1146         text       *arg1 = PG_GETARG_TEXT_P(0);
1147         text       *arg2 = PG_GETARG_TEXT_P(1);
1148         int                     result;
1149
1150         result = internal_text_pattern_compare(arg1, arg2);
1151
1152         PG_FREE_IF_COPY(arg1, 0);
1153         PG_FREE_IF_COPY(arg2, 1);
1154
1155         PG_RETURN_BOOL(result > 0);
1156 }
1157
1158
1159 Datum
1160 text_pattern_ne(PG_FUNCTION_ARGS)
1161 {
1162         text       *arg1 = PG_GETARG_TEXT_P(0);
1163         text       *arg2 = PG_GETARG_TEXT_P(1);
1164         int                     result;
1165
1166         if (VARSIZE(arg1) != VARSIZE(arg2))
1167                 result = 1;
1168         else
1169                 result = internal_text_pattern_compare(arg1, arg2);
1170
1171         PG_FREE_IF_COPY(arg1, 0);
1172         PG_FREE_IF_COPY(arg2, 1);
1173
1174         PG_RETURN_BOOL(result != 0);
1175 }
1176
1177
1178 Datum
1179 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1180 {
1181         text       *arg1 = PG_GETARG_TEXT_P(0);
1182         text       *arg2 = PG_GETARG_TEXT_P(1);
1183         int                     result;
1184
1185         result = internal_text_pattern_compare(arg1, arg2);
1186
1187         PG_FREE_IF_COPY(arg1, 0);
1188         PG_FREE_IF_COPY(arg2, 1);
1189
1190         PG_RETURN_INT32(result);
1191 }
1192
1193
1194 /*-------------------------------------------------------------
1195  * byteaoctetlen
1196  *
1197  * get the number of bytes contained in an instance of type 'bytea'
1198  *-------------------------------------------------------------
1199  */
1200 Datum
1201 byteaoctetlen(PG_FUNCTION_ARGS)
1202 {
1203         Datum           str = PG_GETARG_DATUM(0);
1204
1205         /* We need not detoast the input at all */
1206         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1207 }
1208
1209 /*
1210  * byteacat -
1211  *        takes two bytea* and returns a bytea* that is the concatenation of
1212  *        the two.
1213  *
1214  * Cloned from textcat and modified as required.
1215  */
1216 Datum
1217 byteacat(PG_FUNCTION_ARGS)
1218 {
1219         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1220         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1221         int                     len1,
1222                                 len2,
1223                                 len;
1224         bytea      *result;
1225         char       *ptr;
1226
1227         len1 = VARSIZE(t1) - VARHDRSZ;
1228         if (len1 < 0)
1229                 len1 = 0;
1230
1231         len2 = VARSIZE(t2) - VARHDRSZ;
1232         if (len2 < 0)
1233                 len2 = 0;
1234
1235         len = len1 + len2 + VARHDRSZ;
1236         result = (bytea *) palloc(len);
1237
1238         /* Set size of result string... */
1239         VARATT_SIZEP(result) = len;
1240
1241         /* Fill data field of result string... */
1242         ptr = VARDATA(result);
1243         if (len1 > 0)
1244                 memcpy(ptr, VARDATA(t1), len1);
1245         if (len2 > 0)
1246                 memcpy(ptr + len1, VARDATA(t2), len2);
1247
1248         PG_RETURN_BYTEA_P(result);
1249 }
1250
1251 #define PG_STR_GET_BYTEA(str_) \
1252         DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1253 /*
1254  * bytea_substr()
1255  * Return a substring starting at the specified position.
1256  * Cloned from text_substr and modified as required.
1257  *
1258  * Input:
1259  *      - string
1260  *      - starting position (is one-based)
1261  *      - string length (optional)
1262  *
1263  * If the starting position is zero or less, then return from the start of the string
1264  * adjusting the length to be consistent with the "negative start" per SQL92.
1265  * If the length is less than zero, an ERROR is thrown. If no third argument
1266  * (length) is provided, the length to the end of the string is assumed.
1267  */
1268 Datum
1269 bytea_substr(PG_FUNCTION_ARGS)
1270 {
1271         int                     S = PG_GETARG_INT32(1); /* start position */
1272         int                     S1;                             /* adjusted start position */
1273         int                     L1;                             /* adjusted substring length */
1274
1275         S1 = Max(S, 1);
1276
1277         if (fcinfo->nargs == 2)
1278         {
1279                 /*
1280                  * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs
1281                  * everything to the end of the string if we pass it a negative
1282                  * value for length.
1283                  */
1284                 L1 = -1;
1285         }
1286         else
1287         {
1288                 /* end position */
1289                 int                     E = S + PG_GETARG_INT32(2);
1290
1291                 /*
1292                  * A negative value for L is the only way for the end position to
1293                  * be before the start. SQL99 says to throw an error.
1294                  */
1295                 if (E < S)
1296                         ereport(ERROR,
1297                                         (errcode(ERRCODE_SUBSTRING_ERROR),
1298                                          errmsg("negative substring length not allowed")));
1299
1300                 /*
1301                  * A zero or negative value for the end position can happen if the
1302                  * start was negative or one. SQL99 says to return a zero-length
1303                  * string.
1304                  */
1305                 if (E < 1)
1306                         PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1307
1308                 L1 = E - S1;
1309         }
1310
1311         /*
1312          * If the start position is past the end of the string, SQL99 says to
1313          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
1314          * that for us. Convert to zero-based starting position
1315          */
1316         PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1317 }
1318
1319 /*
1320  * bytea_substr_no_len -
1321  *        Wrapper to avoid opr_sanity failure due to
1322  *        one function accepting a different number of args.
1323  */
1324 Datum
1325 bytea_substr_no_len(PG_FUNCTION_ARGS)
1326 {
1327         return bytea_substr(fcinfo);
1328 }
1329
1330 /*
1331  * byteapos -
1332  *        Return the position of the specified substring.
1333  *        Implements the SQL92 POSITION() function.
1334  * Cloned from textpos and modified as required.
1335  */
1336 Datum
1337 byteapos(PG_FUNCTION_ARGS)
1338 {
1339         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1340         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1341         int                     pos;
1342         int                     px,
1343                                 p;
1344         int                     len1,
1345                                 len2;
1346         char       *p1,
1347                            *p2;
1348
1349         if (VARSIZE(t2) <= VARHDRSZ)
1350                 PG_RETURN_INT32(1);             /* result for empty pattern */
1351
1352         len1 = VARSIZE(t1) - VARHDRSZ;
1353         len2 = VARSIZE(t2) - VARHDRSZ;
1354
1355         p1 = VARDATA(t1);
1356         p2 = VARDATA(t2);
1357
1358         pos = 0;
1359         px = (len1 - len2);
1360         for (p = 0; p <= px; p++)
1361         {
1362                 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1363                 {
1364                         pos = p + 1;
1365                         break;
1366                 };
1367                 p1++;
1368         };
1369
1370         PG_RETURN_INT32(pos);
1371 }
1372
1373 /*-------------------------------------------------------------
1374  * byteaGetByte
1375  *
1376  * this routine treats "bytea" as an array of bytes.
1377  * It returns the Nth byte (a number between 0 and 255).
1378  *-------------------------------------------------------------
1379  */
1380 Datum
1381 byteaGetByte(PG_FUNCTION_ARGS)
1382 {
1383         bytea      *v = PG_GETARG_BYTEA_P(0);
1384         int32           n = PG_GETARG_INT32(1);
1385         int                     len;
1386         int                     byte;
1387
1388         len = VARSIZE(v) - VARHDRSZ;
1389
1390         if (n < 0 || n >= len)
1391                 ereport(ERROR,
1392                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1393                                  errmsg("index %d out of valid range, 0..%d",
1394                                                 n, len - 1)));
1395
1396         byte = ((unsigned char *) VARDATA(v))[n];
1397
1398         PG_RETURN_INT32(byte);
1399 }
1400
1401 /*-------------------------------------------------------------
1402  * byteaGetBit
1403  *
1404  * This routine treats a "bytea" type like an array of bits.
1405  * It returns the value of the Nth bit (0 or 1).
1406  *
1407  *-------------------------------------------------------------
1408  */
1409 Datum
1410 byteaGetBit(PG_FUNCTION_ARGS)
1411 {
1412         bytea      *v = PG_GETARG_BYTEA_P(0);
1413         int32           n = PG_GETARG_INT32(1);
1414         int                     byteNo,
1415                                 bitNo;
1416         int                     len;
1417         int                     byte;
1418
1419         len = VARSIZE(v) - VARHDRSZ;
1420
1421         if (n < 0 || n >= len * 8)
1422                 ereport(ERROR,
1423                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1424                                  errmsg("index %d out of valid range, 0..%d",
1425                                                 n, len * 8 - 1)));
1426
1427         byteNo = n / 8;
1428         bitNo = n % 8;
1429
1430         byte = ((unsigned char *) VARDATA(v))[byteNo];
1431
1432         if (byte & (1 << bitNo))
1433                 PG_RETURN_INT32(1);
1434         else
1435                 PG_RETURN_INT32(0);
1436 }
1437
1438 /*-------------------------------------------------------------
1439  * byteaSetByte
1440  *
1441  * Given an instance of type 'bytea' creates a new one with
1442  * the Nth byte set to the given value.
1443  *
1444  *-------------------------------------------------------------
1445  */
1446 Datum
1447 byteaSetByte(PG_FUNCTION_ARGS)
1448 {
1449         bytea      *v = PG_GETARG_BYTEA_P(0);
1450         int32           n = PG_GETARG_INT32(1);
1451         int32           newByte = PG_GETARG_INT32(2);
1452         int                     len;
1453         bytea      *res;
1454
1455         len = VARSIZE(v) - VARHDRSZ;
1456
1457         if (n < 0 || n >= len)
1458                 ereport(ERROR,
1459                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1460                                  errmsg("index %d out of valid range, 0..%d",
1461                                                 n, len - 1)));
1462
1463         /*
1464          * Make a copy of the original varlena.
1465          */
1466         res = (bytea *) palloc(VARSIZE(v));
1467         memcpy((char *) res, (char *) v, VARSIZE(v));
1468
1469         /*
1470          * Now set the byte.
1471          */
1472         ((unsigned char *) VARDATA(res))[n] = newByte;
1473
1474         PG_RETURN_BYTEA_P(res);
1475 }
1476
1477 /*-------------------------------------------------------------
1478  * byteaSetBit
1479  *
1480  * Given an instance of type 'bytea' creates a new one with
1481  * the Nth bit set to the given value.
1482  *
1483  *-------------------------------------------------------------
1484  */
1485 Datum
1486 byteaSetBit(PG_FUNCTION_ARGS)
1487 {
1488         bytea      *v = PG_GETARG_BYTEA_P(0);
1489         int32           n = PG_GETARG_INT32(1);
1490         int32           newBit = PG_GETARG_INT32(2);
1491         bytea      *res;
1492         int                     len;
1493         int                     oldByte,
1494                                 newByte;
1495         int                     byteNo,
1496                                 bitNo;
1497
1498         len = VARSIZE(v) - VARHDRSZ;
1499
1500         if (n < 0 || n >= len * 8)
1501                 ereport(ERROR,
1502                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1503                                  errmsg("index %d out of valid range, 0..%d",
1504                                                 n, len * 8 - 1)));
1505
1506         byteNo = n / 8;
1507         bitNo = n % 8;
1508
1509         /*
1510          * sanity check!
1511          */
1512         if (newBit != 0 && newBit != 1)
1513                 ereport(ERROR,
1514                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1515                                  errmsg("new bit must be 0 or 1")));
1516
1517         /*
1518          * Make a copy of the original varlena.
1519          */
1520         res = (bytea *) palloc(VARSIZE(v));
1521         memcpy((char *) res, (char *) v, VARSIZE(v));
1522
1523         /*
1524          * Update the byte.
1525          */
1526         oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1527
1528         if (newBit == 0)
1529                 newByte = oldByte & (~(1 << bitNo));
1530         else
1531                 newByte = oldByte | (1 << bitNo);
1532
1533         ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1534
1535         PG_RETURN_BYTEA_P(res);
1536 }
1537
1538
1539 /* text_name()
1540  * Converts a text type to a Name type.
1541  */
1542 Datum
1543 text_name(PG_FUNCTION_ARGS)
1544 {
1545         text       *s = PG_GETARG_TEXT_P(0);
1546         Name            result;
1547         int                     len;
1548
1549         len = VARSIZE(s) - VARHDRSZ;
1550
1551         /* Truncate oversize input */
1552         if (len >= NAMEDATALEN)
1553                 len = NAMEDATALEN - 1;
1554
1555 #ifdef STRINGDEBUG
1556         printf("text- convert string length %d (%d) ->%d\n",
1557                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1558 #endif
1559
1560         result = (Name) palloc(NAMEDATALEN);
1561         memcpy(NameStr(*result), VARDATA(s), len);
1562
1563         /* now null pad to full length... */
1564         while (len < NAMEDATALEN)
1565         {
1566                 *(NameStr(*result) + len) = '\0';
1567                 len++;
1568         }
1569
1570         PG_RETURN_NAME(result);
1571 }
1572
1573 /* name_text()
1574  * Converts a Name type to a text type.
1575  */
1576 Datum
1577 name_text(PG_FUNCTION_ARGS)
1578 {
1579         Name            s = PG_GETARG_NAME(0);
1580         text       *result;
1581         int                     len;
1582
1583         len = strlen(NameStr(*s));
1584
1585 #ifdef STRINGDEBUG
1586         printf("text- convert string length %d (%d) ->%d\n",
1587                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1588 #endif
1589
1590         result = palloc(VARHDRSZ + len);
1591         VARATT_SIZEP(result) = VARHDRSZ + len;
1592         memcpy(VARDATA(result), NameStr(*s), len);
1593
1594         PG_RETURN_TEXT_P(result);
1595 }
1596
1597
1598 /*
1599  * textToQualifiedNameList - convert a text object to list of names
1600  *
1601  * This implements the input parsing needed by nextval() and other
1602  * functions that take a text parameter representing a qualified name.
1603  * We split the name at dots, downcase if not double-quoted, and
1604  * truncate names if they're too long.
1605  */
1606 List *
1607 textToQualifiedNameList(text *textval)
1608 {
1609         char       *rawname;
1610         List       *result = NIL;
1611         List       *namelist;
1612         ListCell   *l;
1613
1614         /* Convert to C string (handles possible detoasting). */
1615         /* Note we rely on being able to modify rawname below. */
1616         rawname = DatumGetCString(DirectFunctionCall1(textout,
1617                                                                                           PointerGetDatum(textval)));
1618
1619         if (!SplitIdentifierString(rawname, '.', &namelist))
1620                 ereport(ERROR,
1621                                 (errcode(ERRCODE_INVALID_NAME),
1622                                  errmsg("invalid name syntax")));
1623
1624         if (namelist == NIL)
1625                 ereport(ERROR,
1626                                 (errcode(ERRCODE_INVALID_NAME),
1627                                  errmsg("invalid name syntax")));
1628
1629         foreach(l, namelist)
1630         {
1631                 char       *curname = (char *) lfirst(l);
1632
1633                 result = lappend(result, makeString(pstrdup(curname)));
1634         }
1635
1636         pfree(rawname);
1637         list_free(namelist);
1638
1639         return result;
1640 }
1641
1642 /*
1643  * SplitIdentifierString --- parse a string containing identifiers
1644  *
1645  * This is the guts of textToQualifiedNameList, and is exported for use in
1646  * other situations such as parsing GUC variables.      In the GUC case, it's
1647  * important to avoid memory leaks, so the API is designed to minimize the
1648  * amount of stuff that needs to be allocated and freed.
1649  *
1650  * Inputs:
1651  *      rawstring: the input string; must be overwritable!      On return, it's
1652  *                         been modified to contain the separated identifiers.
1653  *      separator: the separator punctuation expected between identifiers
1654  *                         (typically '.' or ',').      Whitespace may also appear around
1655  *                         identifiers.
1656  * Outputs:
1657  *      namelist: filled with a palloc'd list of pointers to identifiers within
1658  *                        rawstring.  Caller should freeList() this even on error return.
1659  *
1660  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1661  *
1662  * Note that an empty string is considered okay here, though not in
1663  * textToQualifiedNameList.
1664  */
1665 bool
1666 SplitIdentifierString(char *rawstring, char separator,
1667                                           List **namelist)
1668 {
1669         char       *nextp = rawstring;
1670         bool            done = false;
1671
1672         *namelist = NIL;
1673
1674         while (isspace((unsigned char) *nextp))
1675                 nextp++;                                /* skip leading whitespace */
1676
1677         if (*nextp == '\0')
1678                 return true;                    /* allow empty string */
1679
1680         /* At the top of the loop, we are at start of a new identifier. */
1681         do
1682         {
1683                 char       *curname;
1684                 char       *endp;
1685
1686                 if (*nextp == '\"')
1687                 {
1688                         /* Quoted name --- collapse quote-quote pairs, no downcasing */
1689                         curname = nextp + 1;
1690                         for (;;)
1691                         {
1692                                 endp = strchr(nextp + 1, '\"');
1693                                 if (endp == NULL)
1694                                         return false;           /* mismatched quotes */
1695                                 if (endp[1] != '\"')
1696                                         break;          /* found end of quoted name */
1697                                 /* Collapse adjacent quotes into one quote, and look again */
1698                                 memmove(endp, endp + 1, strlen(endp));
1699                                 nextp = endp;
1700                         }
1701                         /* endp now points at the terminating quote */
1702                         nextp = endp + 1;
1703                 }
1704                 else
1705                 {
1706                         /* Unquoted name --- extends to separator or whitespace */
1707                         char       *downname;
1708                         int                     len;
1709
1710                         curname = nextp;
1711                         while (*nextp && *nextp != separator &&
1712                                    !isspace((unsigned char) *nextp))
1713                                 nextp++;
1714                         endp = nextp;
1715                         if (curname == nextp)
1716                                 return false;   /* empty unquoted name not allowed */
1717
1718                         /*
1719                          * Downcase the identifier, using same code as main lexer
1720                          * does.
1721                          *
1722                          * XXX because we want to overwrite the input in-place, we cannot
1723                          * support a downcasing transformation that increases the
1724                          * string length.  This is not a problem given the current
1725                          * implementation of downcase_truncate_identifier, but we'll
1726                          * probably have to do something about this someday.
1727                          */
1728                         len = endp - curname;
1729                         downname = downcase_truncate_identifier(curname, len, false);
1730                         Assert(strlen(downname) <= len);
1731                         strncpy(curname, downname, len);
1732                         pfree(downname);
1733                 }
1734
1735                 while (isspace((unsigned char) *nextp))
1736                         nextp++;                        /* skip trailing whitespace */
1737
1738                 if (*nextp == separator)
1739                 {
1740                         nextp++;
1741                         while (isspace((unsigned char) *nextp))
1742                                 nextp++;                /* skip leading whitespace for next */
1743                         /* we expect another name, so done remains false */
1744                 }
1745                 else if (*nextp == '\0')
1746                         done = true;
1747                 else
1748                         return false;           /* invalid syntax */
1749
1750                 /* Now safe to overwrite separator with a null */
1751                 *endp = '\0';
1752
1753                 /* Truncate name if it's overlength */
1754                 truncate_identifier(curname, strlen(curname), false);
1755
1756                 /*
1757                  * Finished isolating current name --- add it to list
1758                  */
1759                 *namelist = lappend(*namelist, curname);
1760
1761                 /* Loop back if we didn't reach end of string */
1762         } while (!done);
1763
1764         return true;
1765 }
1766
1767
1768 /*****************************************************************************
1769  *      Comparison Functions used for bytea
1770  *
1771  * Note: btree indexes need these routines not to leak memory; therefore,
1772  * be careful to free working copies of toasted datums.  Most places don't
1773  * need to be so careful.
1774  *****************************************************************************/
1775
1776 Datum
1777 byteaeq(PG_FUNCTION_ARGS)
1778 {
1779         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1780         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1781         int                     len1,
1782                                 len2;
1783         bool            result;
1784
1785         len1 = VARSIZE(arg1) - VARHDRSZ;
1786         len2 = VARSIZE(arg2) - VARHDRSZ;
1787
1788         /* fast path for different-length inputs */
1789         if (len1 != len2)
1790                 result = false;
1791         else
1792                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1793
1794         PG_FREE_IF_COPY(arg1, 0);
1795         PG_FREE_IF_COPY(arg2, 1);
1796
1797         PG_RETURN_BOOL(result);
1798 }
1799
1800 Datum
1801 byteane(PG_FUNCTION_ARGS)
1802 {
1803         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1804         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1805         int                     len1,
1806                                 len2;
1807         bool            result;
1808
1809         len1 = VARSIZE(arg1) - VARHDRSZ;
1810         len2 = VARSIZE(arg2) - VARHDRSZ;
1811
1812         /* fast path for different-length inputs */
1813         if (len1 != len2)
1814                 result = true;
1815         else
1816                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1817
1818         PG_FREE_IF_COPY(arg1, 0);
1819         PG_FREE_IF_COPY(arg2, 1);
1820
1821         PG_RETURN_BOOL(result);
1822 }
1823
1824 Datum
1825 bytealt(PG_FUNCTION_ARGS)
1826 {
1827         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1828         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1829         int                     len1,
1830                                 len2;
1831         int                     cmp;
1832
1833         len1 = VARSIZE(arg1) - VARHDRSZ;
1834         len2 = VARSIZE(arg2) - VARHDRSZ;
1835
1836         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1837
1838         PG_FREE_IF_COPY(arg1, 0);
1839         PG_FREE_IF_COPY(arg2, 1);
1840
1841         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1842 }
1843
1844 Datum
1845 byteale(PG_FUNCTION_ARGS)
1846 {
1847         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1848         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1849         int                     len1,
1850                                 len2;
1851         int                     cmp;
1852
1853         len1 = VARSIZE(arg1) - VARHDRSZ;
1854         len2 = VARSIZE(arg2) - VARHDRSZ;
1855
1856         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1857
1858         PG_FREE_IF_COPY(arg1, 0);
1859         PG_FREE_IF_COPY(arg2, 1);
1860
1861         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1862 }
1863
1864 Datum
1865 byteagt(PG_FUNCTION_ARGS)
1866 {
1867         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1868         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1869         int                     len1,
1870                                 len2;
1871         int                     cmp;
1872
1873         len1 = VARSIZE(arg1) - VARHDRSZ;
1874         len2 = VARSIZE(arg2) - VARHDRSZ;
1875
1876         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1877
1878         PG_FREE_IF_COPY(arg1, 0);
1879         PG_FREE_IF_COPY(arg2, 1);
1880
1881         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1882 }
1883
1884 Datum
1885 byteage(PG_FUNCTION_ARGS)
1886 {
1887         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1888         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1889         int                     len1,
1890                                 len2;
1891         int                     cmp;
1892
1893         len1 = VARSIZE(arg1) - VARHDRSZ;
1894         len2 = VARSIZE(arg2) - VARHDRSZ;
1895
1896         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1897
1898         PG_FREE_IF_COPY(arg1, 0);
1899         PG_FREE_IF_COPY(arg2, 1);
1900
1901         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1902 }
1903
1904 Datum
1905 byteacmp(PG_FUNCTION_ARGS)
1906 {
1907         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1908         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1909         int                     len1,
1910                                 len2;
1911         int                     cmp;
1912
1913         len1 = VARSIZE(arg1) - VARHDRSZ;
1914         len2 = VARSIZE(arg2) - VARHDRSZ;
1915
1916         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1917         if ((cmp == 0) && (len1 != len2))
1918                 cmp = (len1 < len2) ? -1 : 1;
1919
1920         PG_FREE_IF_COPY(arg1, 0);
1921         PG_FREE_IF_COPY(arg2, 1);
1922
1923         PG_RETURN_INT32(cmp);
1924 }
1925
1926 /*
1927  * appendStringInfoText
1928  *
1929  * Append a text to str.
1930  * Like appendStringInfoString(str, PG_TEXT_GET_STR(s)) but faster.
1931  */
1932 static void
1933 appendStringInfoText(StringInfo str, const text *t)
1934 {
1935         appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
1936 }
1937
1938 /*
1939  * replace_text
1940  * replace all occurrences of 'old_sub_str' in 'orig_str'
1941  * with 'new_sub_str' to form 'new_str'
1942  *
1943  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
1944  * otherwise returns 'new_str'
1945  */
1946 Datum
1947 replace_text(PG_FUNCTION_ARGS)
1948 {
1949         text       *src_text = PG_GETARG_TEXT_P(0);
1950         text       *from_sub_text = PG_GETARG_TEXT_P(1);
1951         text       *to_sub_text = PG_GETARG_TEXT_P(2);
1952         int                     src_text_len = TEXTLEN(src_text);
1953         int                     from_sub_text_len = TEXTLEN(from_sub_text);
1954         text       *left_text;
1955         text       *right_text;
1956         text       *buf_text;
1957         text       *ret_text;
1958         int                     curr_posn;
1959         StringInfo      str;
1960
1961         if (src_text_len == 0 || from_sub_text_len == 0)
1962                 PG_RETURN_TEXT_P(src_text);
1963
1964         curr_posn = TEXTPOS(src_text, from_sub_text);
1965
1966         /* When the from_sub_text is not found, there is nothing to do. */
1967         if (curr_posn == 0)
1968                 PG_RETURN_TEXT_P(src_text);
1969
1970         str = makeStringInfo();
1971         buf_text = src_text;
1972
1973         while (curr_posn > 0)
1974         {
1975                 left_text = text_substring(PointerGetDatum(buf_text),
1976                                                                    1, curr_posn - 1, false);
1977                 right_text = text_substring(PointerGetDatum(buf_text),
1978                                                                         curr_posn + from_sub_text_len, -1, true);
1979
1980                 appendStringInfoText(str, left_text);
1981                 appendStringInfoText(str, to_sub_text);
1982
1983                 if (buf_text != src_text)
1984                         pfree(buf_text);
1985                 pfree(left_text);
1986                 buf_text = right_text;
1987                 curr_posn = TEXTPOS(buf_text, from_sub_text);
1988         }
1989
1990         appendStringInfoText(str, buf_text);
1991         if (buf_text != src_text)
1992                 pfree(buf_text);
1993
1994         ret_text = PG_STR_GET_TEXT(str->data);
1995         pfree(str->data);
1996         pfree(str);
1997
1998         PG_RETURN_TEXT_P(ret_text);
1999 }
2000
2001 /*
2002  * check_replace_text_has_escape_char
2003  * check whether replace_text has escape char. 
2004  */
2005 static bool
2006 check_replace_text_has_escape_char(const text *replace_text)
2007 {
2008         const char *p = VARDATA(replace_text);
2009         const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2010
2011         if (pg_database_encoding_max_length() == 1)
2012         {
2013                 for (; p < p_end; p++)
2014                         if (*p == '\\') return true;
2015         }
2016         else
2017         {
2018                 for (; p < p_end; p += pg_mblen(p))
2019                         if (*p == '\\') return true;
2020         }
2021
2022         return false;
2023 }
2024
2025 /*
2026  * appendStringInfoRegexpSubstr
2027  * append string by using back references of regexp.
2028  */
2029 static void
2030 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
2031     regmatch_t *pmatch, text *src_text)
2032 {
2033         const char *p = VARDATA(replace_text);
2034         const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2035
2036         int                     eml = pg_database_encoding_max_length();
2037
2038         int                     substr_start = 1;
2039         int                     ch_cnt;
2040
2041         int                     so;
2042         int                     eo;
2043
2044         while (1)
2045         {
2046                 /* Find escape char. */
2047                 ch_cnt = 0;
2048                 if (eml == 1)
2049                 {
2050                         for (; p < p_end && *p != '\\'; p++)
2051                                 ch_cnt++;
2052                 }
2053                 else
2054                 {
2055                         for (; p < p_end && *p != '\\'; p += pg_mblen(p))
2056                                 ch_cnt++;
2057                 }
2058
2059                 /*
2060                  * Copy the text when there is a text in the left of escape char
2061                  * or escape char is not found.
2062                  */
2063                 if (ch_cnt)
2064                 {
2065                         text *append_text = text_substring(PointerGetDatum(replace_text),
2066                                                                           substr_start, ch_cnt, false);
2067                         appendStringInfoString(str, PG_TEXT_GET_STR(append_text));
2068                         pfree(append_text);
2069                 }
2070                 substr_start += ch_cnt + 1;
2071
2072                 if (p >= p_end) /* When escape char is not found. */
2073                         break;
2074
2075                 /* See the next character of escape char. */
2076                 p++;
2077                 so = eo = -1;
2078
2079                 if (*p >= '1' && *p <= '9')
2080                 {
2081                         /* Use the back reference of regexp. */
2082                         int             idx = *p - '0';
2083                         so = pmatch[idx].rm_so;
2084                         eo = pmatch[idx].rm_eo;
2085                         p++;
2086                         substr_start++;
2087                 }
2088                 else if (*p == '&')
2089                 {
2090                         /* Use the entire matched string. */
2091                         so = pmatch[0].rm_so;
2092                         eo = pmatch[0].rm_eo;
2093                         p++;
2094                         substr_start++;
2095                 }
2096
2097                 if (so != -1 && eo != -1)
2098                 {
2099                         /* Copy the text that is back reference of regexp. */
2100                         text *append_text = text_substring(PointerGetDatum(src_text),
2101                                                                           so + 1, (eo - so), false);
2102                         appendStringInfoString(str, PG_TEXT_GET_STR(append_text));
2103                         pfree(append_text);
2104                 }
2105         }
2106 }
2107
2108 #define REGEXP_REPLACE_BACKREF_CNT              10
2109
2110 /*
2111  * replace_text_regexp
2112  * replace text that matches to regexp in src_text to replace_text.
2113  */
2114 Datum
2115 replace_text_regexp(PG_FUNCTION_ARGS)
2116 {
2117         text       *ret_text;
2118         text       *src_text = PG_GETARG_TEXT_P(0);
2119         int                     src_text_len = VARSIZE(src_text) - VARHDRSZ;
2120         regex_t    *re = (regex_t *)PG_GETARG_POINTER(1);
2121         text       *replace_text = PG_GETARG_TEXT_P(2);
2122         bool            global = PG_GETARG_BOOL(3);
2123         StringInfo      str = makeStringInfo();
2124         int                     regexec_result;
2125         regmatch_t      pmatch[REGEXP_REPLACE_BACKREF_CNT];
2126         pg_wchar   *data;
2127         size_t          data_len;
2128         int                     search_start;
2129         int                     data_pos;
2130         bool            have_escape;
2131
2132         /* Convert data string to wide characters. */
2133         data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
2134         data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len);
2135
2136         /* Check whether replace_text has escape char. */
2137         have_escape = check_replace_text_has_escape_char(replace_text);
2138
2139         for (search_start = data_pos = 0; search_start <= data_len;)
2140         {
2141                 regexec_result = pg_regexec(re,
2142                                                                         data,
2143                                                                         data_len,
2144                                                                         search_start,
2145                                                                         NULL,   /* no details */
2146                                                                         REGEXP_REPLACE_BACKREF_CNT,
2147                                                                         pmatch,
2148                                                                         0);
2149
2150                 if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
2151                 {
2152                         char    errMsg[100];
2153
2154                         /* re failed??? */
2155                         pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
2156                         ereport(ERROR,
2157                                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
2158                                  errmsg("regular expression failed: %s", errMsg)));
2159                 }
2160
2161                 if (regexec_result == REG_NOMATCH)
2162                         break;
2163
2164         /*
2165          * Copy the text when there is a text in the left of matched position.
2166          */
2167                 if (pmatch[0].rm_so - data_pos > 0)
2168                 {
2169                         text *left_text = text_substring(PointerGetDatum(src_text),
2170                                                                            data_pos + 1,
2171                                                                            pmatch[0].rm_so - data_pos, false);
2172                         appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
2173                         pfree(left_text);
2174                 }
2175
2176                 /*
2177                  * Copy the replace_text. Process back references when the
2178                  * replace_text has escape characters. 
2179                  */
2180                 if (have_escape)
2181                         appendStringInfoRegexpSubstr(str, replace_text, pmatch, src_text);
2182                 else
2183                         appendStringInfoString(str, PG_TEXT_GET_STR(replace_text));
2184
2185                 search_start = data_pos = pmatch[0].rm_eo;
2186
2187                 /*
2188                  * When global option is off, replace the first instance only.
2189                  */
2190                 if (!global)
2191                         break;
2192
2193                 /*
2194                  * Search from next character when the matching text is zero width.
2195                  */
2196                 if (pmatch[0].rm_so == pmatch[0].rm_eo)
2197                         search_start++;
2198         }
2199
2200         /*
2201      * Copy the text when there is a text at the right of last matched
2202          * or regexp is not matched.
2203          */
2204         if (data_pos < data_len)
2205         {
2206                 text *right_text = text_substring(PointerGetDatum(src_text),
2207                                                                    data_pos + 1, -1, true);
2208                 appendStringInfoString(str, PG_TEXT_GET_STR(right_text));
2209                 pfree(right_text);
2210         }
2211
2212         ret_text = PG_STR_GET_TEXT(str->data);
2213         pfree(str->data);
2214         pfree(str);
2215         pfree(data);
2216
2217         PG_RETURN_TEXT_P(ret_text);
2218 }
2219
2220 /*
2221  * split_text
2222  * parse input string
2223  * return ord item (1 based)
2224  * based on provided field separator
2225  */
2226 Datum
2227 split_text(PG_FUNCTION_ARGS)
2228 {
2229         text       *inputstring = PG_GETARG_TEXT_P(0);
2230         text       *fldsep = PG_GETARG_TEXT_P(1);
2231         int                     fldnum = PG_GETARG_INT32(2);
2232         int                     inputstring_len = TEXTLEN(inputstring);
2233         int                     fldsep_len = TEXTLEN(fldsep);
2234         int                     start_posn;
2235         int                     end_posn;
2236         text       *result_text;
2237
2238         /* field number is 1 based */
2239         if (fldnum < 1)
2240                 ereport(ERROR,
2241                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2242                                  errmsg("field position must be greater than zero")));
2243
2244         /* return empty string for empty input string */
2245         if (inputstring_len < 1)
2246                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2247
2248         /* empty field separator */
2249         if (fldsep_len < 1)
2250         {
2251                 /* if first field, return input string, else empty string */
2252                 if (fldnum == 1)
2253                         PG_RETURN_TEXT_P(inputstring);
2254                 else
2255                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2256         }
2257
2258         start_posn = text_position(inputstring, fldsep, fldnum - 1);
2259         end_posn = text_position(inputstring, fldsep, fldnum);
2260
2261         if ((start_posn == 0) && (end_posn == 0))       /* fldsep not found */
2262         {
2263                 /* if first field, return input string, else empty string */
2264                 if (fldnum == 1)
2265                         PG_RETURN_TEXT_P(inputstring);
2266                 else
2267                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2268         }
2269         else if (start_posn == 0)
2270         {
2271                 /* first field requested */
2272                 result_text = LEFT(inputstring, fldsep);
2273                 PG_RETURN_TEXT_P(result_text);
2274         }
2275         else if (end_posn == 0)
2276         {
2277                 /* last field requested */
2278                 result_text = text_substring(PointerGetDatum(inputstring),
2279                                                                          start_posn + fldsep_len,
2280                                                                          -1, true);
2281                 PG_RETURN_TEXT_P(result_text);
2282         }
2283         else
2284         {
2285                 /* interior field requested */
2286                 result_text = text_substring(PointerGetDatum(inputstring),
2287                                                                          start_posn + fldsep_len,
2288                                                                          end_posn - start_posn - fldsep_len,
2289                                                                          false);
2290                 PG_RETURN_TEXT_P(result_text);
2291         }
2292 }
2293
2294 /*
2295  * text_to_array
2296  * parse input string
2297  * return text array of elements
2298  * based on provided field separator
2299  */
2300 Datum
2301 text_to_array(PG_FUNCTION_ARGS)
2302 {
2303         text       *inputstring = PG_GETARG_TEXT_P(0);
2304         text       *fldsep = PG_GETARG_TEXT_P(1);
2305         int                     inputstring_len = TEXTLEN(inputstring);
2306         int                     fldsep_len = TEXTLEN(fldsep);
2307         int                     fldnum;
2308         int                     start_posn;
2309         int                     end_posn;
2310         text       *result_text;
2311         ArrayBuildState *astate = NULL;
2312
2313         /* return NULL for empty input string */
2314         if (inputstring_len < 1)
2315                 PG_RETURN_NULL();
2316
2317         /*
2318          * empty field separator return one element, 1D, array using the input
2319          * string
2320          */
2321         if (fldsep_len < 1)
2322                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2323                                                                            CStringGetDatum(inputstring), 1));
2324
2325         /* start with end position holding the initial start position */
2326         end_posn = 0;
2327         for (fldnum = 1;; fldnum++) /* field number is 1 based */
2328         {
2329                 Datum           dvalue;
2330                 bool            disnull = false;
2331
2332                 start_posn = end_posn;
2333                 end_posn = text_position(inputstring, fldsep, fldnum);
2334
2335                 if ((start_posn == 0) && (end_posn == 0))               /* fldsep not found */
2336                 {
2337                         if (fldnum == 1)
2338                         {
2339                                 /*
2340                                  * first element return one element, 1D, array using the
2341                                  * input string
2342                                  */
2343                                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2344                                                                            CStringGetDatum(inputstring), 1));
2345                         }
2346                         else
2347                         {
2348                                 /* otherwise create array and exit */
2349                                 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2350                                                                                                   CurrentMemoryContext));
2351                         }
2352                 }
2353                 else if (start_posn == 0)
2354                 {
2355                         /* first field requested */
2356                         result_text = LEFT(inputstring, fldsep);
2357                 }
2358                 else if (end_posn == 0)
2359                 {
2360                         /* last field requested */
2361                         result_text = text_substring(PointerGetDatum(inputstring),
2362                                                                                  start_posn + fldsep_len,
2363                                                                                  -1, true);
2364                 }
2365                 else
2366                 {
2367                         /* interior field requested */
2368                         result_text = text_substring(PointerGetDatum(inputstring),
2369                                                                                  start_posn + fldsep_len,
2370                                                                           end_posn - start_posn - fldsep_len,
2371                                                                                  false);
2372                 }
2373
2374                 /* stash away current value */
2375                 dvalue = PointerGetDatum(result_text);
2376                 astate = accumArrayResult(astate, dvalue,
2377                                                                   disnull, TEXTOID,
2378                                                                   CurrentMemoryContext);
2379         }
2380
2381         /* never reached -- keep compiler quiet */
2382         PG_RETURN_NULL();
2383 }
2384
2385 /*
2386  * array_to_text
2387  * concatenate Cstring representation of input array elements
2388  * using provided field separator
2389  */
2390 Datum
2391 array_to_text(PG_FUNCTION_ARGS)
2392 {
2393         ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
2394         char       *fldsep = PG_TEXTARG_GET_STR(1);
2395         int                     nitems,
2396                            *dims,
2397                                 ndims;
2398         char       *p;
2399         Oid                     element_type;
2400         int                     typlen;
2401         bool            typbyval;
2402         char            typalign;
2403         StringInfo      result_str = makeStringInfo();
2404         int                     i;
2405         ArrayMetaState *my_extra;
2406
2407         p = ARR_DATA_PTR(v);
2408         ndims = ARR_NDIM(v);
2409         dims = ARR_DIMS(v);
2410         nitems = ArrayGetNItems(ndims, dims);
2411
2412         /* if there are no elements, return an empty string */
2413         if (nitems == 0)
2414                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2415
2416         element_type = ARR_ELEMTYPE(v);
2417
2418         /*
2419          * We arrange to look up info about element type, including its output
2420          * conversion proc, only once per series of calls, assuming the
2421          * element type doesn't change underneath us.
2422          */
2423         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2424         if (my_extra == NULL)
2425         {
2426                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2427                                                                                                  sizeof(ArrayMetaState));
2428                 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2429                 my_extra->element_type = InvalidOid;
2430         }
2431
2432         if (my_extra->element_type != element_type)
2433         {
2434                 /*
2435                  * Get info about element type, including its output conversion
2436                  * proc
2437                  */
2438                 get_type_io_data(element_type, IOFunc_output,
2439                                                  &my_extra->typlen, &my_extra->typbyval,
2440                                                  &my_extra->typalign, &my_extra->typdelim,
2441                                                  &my_extra->typioparam, &my_extra->typiofunc);
2442                 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2443                                           fcinfo->flinfo->fn_mcxt);
2444                 my_extra->element_type = element_type;
2445         }
2446         typlen = my_extra->typlen;
2447         typbyval = my_extra->typbyval;
2448         typalign = my_extra->typalign;
2449
2450         for (i = 0; i < nitems; i++)
2451         {
2452                 Datum           itemvalue;
2453                 char       *value;
2454
2455                 itemvalue = fetch_att(p, typbyval, typlen);
2456
2457                 value = DatumGetCString(FunctionCall1(&my_extra->proc,
2458                                                                                           itemvalue));
2459
2460                 if (i > 0)
2461                         appendStringInfo(result_str, "%s%s", fldsep, value);
2462                 else
2463                         appendStringInfoString(result_str, value);
2464
2465                 p = att_addlength(p, typlen, PointerGetDatum(p));
2466                 p = (char *) att_align(p, typalign);
2467         }
2468
2469         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
2470 }
2471
2472 #define HEXBASE 16
2473 /*
2474  * Convert a int32 to a string containing a base 16 (hex) representation of
2475  * the number.
2476  */
2477 Datum
2478 to_hex32(PG_FUNCTION_ARGS)
2479 {
2480         uint32          value = (uint32) PG_GETARG_INT32(0);
2481         text       *result_text;
2482         char       *ptr;
2483         const char *digits = "0123456789abcdef";
2484         char            buf[32];                /* bigger than needed, but reasonable */
2485
2486         ptr = buf + sizeof(buf) - 1;
2487         *ptr = '\0';
2488
2489         do
2490         {
2491                 *--ptr = digits[value % HEXBASE];
2492                 value /= HEXBASE;
2493         } while (ptr > buf && value);
2494
2495         result_text = PG_STR_GET_TEXT(ptr);
2496         PG_RETURN_TEXT_P(result_text);
2497 }
2498
2499 /*
2500  * Convert a int64 to a string containing a base 16 (hex) representation of
2501  * the number.
2502  */
2503 Datum
2504 to_hex64(PG_FUNCTION_ARGS)
2505 {
2506         uint64          value = (uint64) PG_GETARG_INT64(0);
2507         text       *result_text;
2508         char       *ptr;
2509         const char *digits = "0123456789abcdef";
2510         char            buf[32];                /* bigger than needed, but reasonable */
2511
2512         ptr = buf + sizeof(buf) - 1;
2513         *ptr = '\0';
2514
2515         do
2516         {
2517                 *--ptr = digits[value % HEXBASE];
2518                 value /= HEXBASE;
2519         } while (ptr > buf && value);
2520
2521         result_text = PG_STR_GET_TEXT(ptr);
2522         PG_RETURN_TEXT_P(result_text);
2523 }
2524
2525 /*
2526  * Create an md5 hash of a text string and return it as hex
2527  *
2528  * md5 produces a 16 byte (128 bit) hash; double it for hex
2529  */
2530 #define MD5_HASH_LEN  32
2531
2532 Datum
2533 md5_text(PG_FUNCTION_ARGS)
2534 {
2535         text       *in_text = PG_GETARG_TEXT_P(0);
2536         size_t          len;
2537         char        hexsum[MD5_HASH_LEN + 1];
2538         text       *result_text;
2539
2540         /* Calculate the length of the buffer using varlena metadata */
2541         len = VARSIZE(in_text) - VARHDRSZ;
2542
2543         /* get the hash result */
2544         if (md5_hash(VARDATA(in_text), len, hexsum) == false)
2545                 ereport(ERROR,
2546                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2547                                  errmsg("out of memory")));
2548
2549         /* convert to text and return it */
2550         result_text = PG_STR_GET_TEXT(hexsum);
2551         PG_RETURN_TEXT_P(result_text);
2552 }
2553
2554 /*
2555  * Create an md5 hash of a bytea field and return it as a hex string:
2556  * 16-byte md5 digest is represented in 32 hex characters.
2557  */
2558 Datum
2559 md5_bytea(PG_FUNCTION_ARGS)
2560 {
2561         bytea      *in = PG_GETARG_BYTEA_P(0);
2562         size_t          len;
2563         char            hexsum[MD5_HASH_LEN + 1];
2564         text       *result_text;
2565
2566         len = VARSIZE(in) - VARHDRSZ;
2567         if (md5_hash(VARDATA(in), len, hexsum) == false)
2568                 ereport(ERROR,
2569                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2570                                  errmsg("out of memory")));
2571
2572         result_text = PG_STR_GET_TEXT(hexsum);
2573         PG_RETURN_TEXT_P(result_text);
2574 }
2575
2576 /* 
2577  * Return the length of a datum, possibly compressed
2578  */
2579 Datum
2580 pg_column_size(PG_FUNCTION_ARGS)
2581 {
2582         Datum                   value = PG_GETARG_DATUM(0);
2583         int                             result;
2584
2585         /*      fn_extra stores the fixed column length, or -1 for varlena. */
2586         if (fcinfo->flinfo->fn_extra == NULL)   /* first call? */
2587         {
2588                 /* On the first call lookup the datatype of the supplied argument */
2589                 Oid                             argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
2590                 int                             typlen    = get_typlen(argtypeid);
2591
2592                 
2593                 if (typlen == 0)
2594                 {
2595                         /* Oid not in pg_type, should never happen. */
2596                         elog(ERROR, "cache lookup failed for type %u", argtypeid);
2597                 }
2598
2599                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2600                                                                                                           sizeof(int));
2601                 *(int *)fcinfo->flinfo->fn_extra = typlen;
2602         }
2603
2604         if (*(int *)fcinfo->flinfo->fn_extra != -1)
2605                 PG_RETURN_INT32(*(int *)fcinfo->flinfo->fn_extra);
2606         else
2607         {
2608                 result = toast_datum_size(value) - VARHDRSZ;
2609                 PG_RETURN_INT32(result);
2610         }
2611 }