1 /*-------------------------------------------------------------------------
3 * Oracle compatible functions.
5 * Copyright (c) 1996-2003, PostgreSQL Global Development Group
7 * Author: Edmund Mergl <E.Mergl@bawue.de>
8 * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
12 * $Header: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v 1.45 2003/07/27 03:16:20 momjian Exp $
14 *-------------------------------------------------------------------------
20 #include "utils/builtins.h"
21 #include "mb/pg_wchar.h"
24 static text *dotrim(const char *string, int stringlen,
25 const char *set, int setlen,
26 bool doltrim, bool dortrim);
29 /********************************************************************
35 * text lower(text string)
39 * Returns string, with all letters forced to lowercase.
41 ********************************************************************/
44 lower(PG_FUNCTION_ARGS)
46 text *string = PG_GETARG_TEXT_P_COPY(0);
50 /* Since we copied the string, we can scribble directly on the value */
51 ptr = VARDATA(string);
52 m = VARSIZE(string) - VARHDRSZ;
56 *ptr = tolower((unsigned char) *ptr);
60 PG_RETURN_TEXT_P(string);
64 /********************************************************************
70 * text upper(text string)
74 * Returns string, with all letters forced to uppercase.
76 ********************************************************************/
79 upper(PG_FUNCTION_ARGS)
81 text *string = PG_GETARG_TEXT_P_COPY(0);
85 /* Since we copied the string, we can scribble directly on the value */
86 ptr = VARDATA(string);
87 m = VARSIZE(string) - VARHDRSZ;
91 *ptr = toupper((unsigned char) *ptr);
95 PG_RETURN_TEXT_P(string);
99 /********************************************************************
105 * text initcap(text string)
109 * Returns string, with first letter of each word in uppercase,
110 * all other letters in lowercase. A word is delimited by white
113 ********************************************************************/
116 initcap(PG_FUNCTION_ARGS)
118 text *string = PG_GETARG_TEXT_P_COPY(0);
122 /* Since we copied the string, we can scribble directly on the value */
123 ptr = VARDATA(string);
124 m = VARSIZE(string) - VARHDRSZ;
128 *ptr = toupper((unsigned char) *ptr);
135 /* Oracle capitalizes after all non-alphanumeric */
136 if (!isalnum((unsigned char) ptr[-1]))
137 *ptr = toupper((unsigned char) *ptr);
139 *ptr = tolower((unsigned char) *ptr);
143 PG_RETURN_TEXT_P(string);
147 /********************************************************************
153 * text lpad(text string1, int4 len, text string2)
157 * Returns string1, left-padded to length len with the sequence of
158 * characters in string2. If len is less than the length of string1,
159 * instead truncate (on the right) to len.
161 ********************************************************************/
164 lpad(PG_FUNCTION_ARGS)
166 text *string1 = PG_GETARG_TEXT_P(0);
167 int32 len = PG_GETARG_INT32(1);
168 text *string2 = PG_GETARG_TEXT_P(2);
180 /* Negative len is silently taken as zero */
184 s1len = VARSIZE(string1) - VARHDRSZ;
186 s1len = 0; /* shouldn't happen */
188 s2len = VARSIZE(string2) - VARHDRSZ;
190 s2len = 0; /* shouldn't happen */
192 s1len = pg_mbstrlen_with_len(VARDATA(string1), s1len);
195 s1len = len; /* truncate string1 to len chars */
198 len = s1len; /* nothing to pad with, so don't pad */
200 bytelen = pg_database_encoding_max_length() * len;
202 /* check for integer overflow */
203 if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
204 elog(ERROR, "Requested length too large");
206 ret = (text *) palloc(VARHDRSZ + bytelen);
210 ptr2 = VARDATA(string2);
211 ptr2end = ptr2 + s2len;
212 ptr_ret = VARDATA(ret);
216 int mlen = pg_mblen(ptr2);
218 memcpy(ptr_ret, ptr2, mlen);
221 if (ptr2 == ptr2end) /* wrap around at end of s2 */
222 ptr2 = VARDATA(string2);
225 ptr1 = VARDATA(string1);
229 int mlen = pg_mblen(ptr1);
231 memcpy(ptr_ret, ptr1, mlen);
236 VARATT_SIZEP(ret) = ptr_ret - (char *) ret;
238 PG_RETURN_TEXT_P(ret);
242 /********************************************************************
248 * text rpad(text string1, int4 len, text string2)
252 * Returns string1, right-padded to length len with the sequence of
253 * characters in string2. If len is less than the length of string1,
254 * instead truncate (on the right) to len.
256 ********************************************************************/
259 rpad(PG_FUNCTION_ARGS)
261 text *string1 = PG_GETARG_TEXT_P(0);
262 int32 len = PG_GETARG_INT32(1);
263 text *string2 = PG_GETARG_TEXT_P(2);
275 /* Negative len is silently taken as zero */
279 s1len = VARSIZE(string1) - VARHDRSZ;
281 s1len = 0; /* shouldn't happen */
283 s2len = VARSIZE(string2) - VARHDRSZ;
285 s2len = 0; /* shouldn't happen */
287 s1len = pg_mbstrlen_with_len(VARDATA(string1), s1len);
290 s1len = len; /* truncate string1 to len chars */
293 len = s1len; /* nothing to pad with, so don't pad */
295 bytelen = pg_database_encoding_max_length() * len;
297 /* Check for integer overflow */
298 if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
299 elog(ERROR, "Requested length too large");
301 ret = (text *) palloc(VARHDRSZ + bytelen);
304 ptr1 = VARDATA(string1);
305 ptr_ret = VARDATA(ret);
309 int mlen = pg_mblen(ptr1);
311 memcpy(ptr_ret, ptr1, mlen);
316 ptr2 = VARDATA(string2);
317 ptr2end = ptr2 + s2len;
321 int mlen = pg_mblen(ptr2);
323 memcpy(ptr_ret, ptr2, mlen);
326 if (ptr2 == ptr2end) /* wrap around at end of s2 */
327 ptr2 = VARDATA(string2);
330 VARATT_SIZEP(ret) = ptr_ret - (char *) ret;
332 PG_RETURN_TEXT_P(ret);
336 /********************************************************************
342 * text btrim(text string, text set)
346 * Returns string with characters removed from the front and back
347 * up to the first character not in set.
349 ********************************************************************/
352 btrim(PG_FUNCTION_ARGS)
354 text *string = PG_GETARG_TEXT_P(0);
355 text *set = PG_GETARG_TEXT_P(1);
358 ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
359 VARDATA(set), VARSIZE(set) - VARHDRSZ,
362 PG_RETURN_TEXT_P(ret);
365 /********************************************************************
367 * btrim1 --- btrim with set fixed as ' '
369 ********************************************************************/
372 btrim1(PG_FUNCTION_ARGS)
374 text *string = PG_GETARG_TEXT_P(0);
377 ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
381 PG_RETURN_TEXT_P(ret);
385 * Common implementation for btrim, ltrim, rtrim
388 dotrim(const char *string, int stringlen,
389 const char *set, int setlen,
390 bool doltrim, bool dortrim)
395 /* Nothing to do if either string or set is empty */
396 if (stringlen > 0 && setlen > 0)
398 if (pg_database_encoding_max_length() > 1)
401 * In the multibyte-encoding case, build arrays of pointers to
402 * character starts, so that we can avoid inefficient checks in
405 const char **stringchars;
406 const char **setchars;
419 stringchars = (const char **) palloc(stringlen * sizeof(char *));
420 stringmblen = (int *) palloc(stringlen * sizeof(int));
426 stringchars[stringnchars] = p;
427 stringmblen[stringnchars] = mblen = pg_mblen(p);
433 setchars = (const char **) palloc(setlen * sizeof(char *));
434 setmblen = (int *) palloc(setlen * sizeof(int));
440 setchars[setnchars] = p;
441 setmblen[setnchars] = mblen = pg_mblen(p);
447 resultndx = 0; /* index in stringchars[] */
448 resultnchars = stringnchars;
452 while (resultnchars > 0)
454 str_pos = stringchars[resultndx];
455 str_len = stringmblen[resultndx];
456 for (i = 0; i < setnchars; i++)
458 if (str_len == setmblen[i] &&
459 memcmp(str_pos, setchars[i], str_len) == 0)
463 break; /* no match here */
465 stringlen -= str_len;
473 while (resultnchars > 0)
475 str_pos = stringchars[resultndx + resultnchars - 1];
476 str_len = stringmblen[resultndx + resultnchars - 1];
477 for (i = 0; i < setnchars; i++)
479 if (str_len == setmblen[i] &&
480 memcmp(str_pos, setchars[i], str_len) == 0)
484 break; /* no match here */
485 stringlen -= str_len;
498 * In the single-byte-encoding case, we don't need such overhead.
502 while (stringlen > 0)
504 char str_ch = *string;
506 for (i = 0; i < setlen; i++)
508 if (str_ch == set[i])
512 break; /* no match here */
520 while (stringlen > 0)
522 char str_ch = string[stringlen - 1];
524 for (i = 0; i < setlen; i++)
526 if (str_ch == set[i])
530 break; /* no match here */
537 /* Return selected portion of string */
538 result = (text *) palloc(VARHDRSZ + stringlen);
539 VARATT_SIZEP(result) = VARHDRSZ + stringlen;
540 memcpy(VARDATA(result), string, stringlen);
545 /********************************************************************
551 * bytea byteatrim(byta string, bytea set)
555 * Returns string with characters removed from the front and back
556 * up to the first character not in set.
558 * Cloned from btrim and modified as required.
559 ********************************************************************/
562 byteatrim(PG_FUNCTION_ARGS)
564 bytea *string = PG_GETARG_BYTEA_P(0);
565 bytea *set = PG_GETARG_BYTEA_P(1);
573 if ((m = VARSIZE(string) - VARHDRSZ) <= 0 ||
574 (VARSIZE(set) - VARHDRSZ) <= 0)
575 PG_RETURN_BYTEA_P(string);
577 ptr = VARDATA(string);
578 end = VARDATA(string) + VARSIZE(string) - VARHDRSZ - 1;
579 end2 = VARDATA(set) + VARSIZE(set) - VARHDRSZ - 1;
611 ret = (bytea *) palloc(VARHDRSZ + m);
612 VARATT_SIZEP(ret) = VARHDRSZ + m;
613 memcpy(VARDATA(ret), ptr, m);
615 PG_RETURN_BYTEA_P(ret);
618 /********************************************************************
624 * text ltrim(text string, text set)
628 * Returns string with initial characters removed up to the first
629 * character not in set.
631 ********************************************************************/
634 ltrim(PG_FUNCTION_ARGS)
636 text *string = PG_GETARG_TEXT_P(0);
637 text *set = PG_GETARG_TEXT_P(1);
640 ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
641 VARDATA(set), VARSIZE(set) - VARHDRSZ,
644 PG_RETURN_TEXT_P(ret);
647 /********************************************************************
649 * ltrim1 --- ltrim with set fixed as ' '
651 ********************************************************************/
654 ltrim1(PG_FUNCTION_ARGS)
656 text *string = PG_GETARG_TEXT_P(0);
659 ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
663 PG_RETURN_TEXT_P(ret);
666 /********************************************************************
672 * text rtrim(text string, text set)
676 * Returns string with final characters removed after the last
677 * character not in set.
679 ********************************************************************/
682 rtrim(PG_FUNCTION_ARGS)
684 text *string = PG_GETARG_TEXT_P(0);
685 text *set = PG_GETARG_TEXT_P(1);
688 ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
689 VARDATA(set), VARSIZE(set) - VARHDRSZ,
692 PG_RETURN_TEXT_P(ret);
695 /********************************************************************
697 * rtrim1 --- rtrim with set fixed as ' '
699 ********************************************************************/
702 rtrim1(PG_FUNCTION_ARGS)
704 text *string = PG_GETARG_TEXT_P(0);
707 ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
711 PG_RETURN_TEXT_P(ret);
715 /********************************************************************
721 * text translate(text string, text from, text to)
725 * Returns string after replacing all occurrences of characters in from
726 * with the corresponding character in to. If from is longer than to,
727 * occurrences of the extra characters in from are deleted.
728 * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
730 ********************************************************************/
733 translate(PG_FUNCTION_ARGS)
735 text *string = PG_GETARG_TEXT_P(0);
736 text *from = PG_GETARG_TEXT_P(1);
737 text *to = PG_GETARG_TEXT_P(2);
755 if ((m = VARSIZE(string) - VARHDRSZ) <= 0)
756 PG_RETURN_TEXT_P(string);
758 fromlen = VARSIZE(from) - VARHDRSZ;
759 from_ptr = VARDATA(from);
760 tolen = VARSIZE(to) - VARHDRSZ;
761 to_ptr = VARDATA(to);
763 str_len = VARSIZE(string);
764 estimate_len = (tolen * 1.0 / fromlen + 0.5) * str_len;
765 estimate_len = estimate_len > str_len ? estimate_len : str_len;
766 result = (text *) palloc(estimate_len);
768 source = VARDATA(string);
769 target = VARDATA(result);
774 source_len = pg_mblen(source);
777 for (i = 0; i < fromlen; i += len)
779 len = pg_mblen(&from_ptr[i]);
780 if (len == source_len &&
781 memcmp(source, &from_ptr[i], len) == 0)
791 for (i = 0; i < from_index; i++)
794 if (p >= (to_ptr + tolen))
797 if (p < (to_ptr + tolen))
800 memcpy(target, p, len);
808 /* no match, so copy */
809 memcpy(target, source, source_len);
810 target += source_len;
811 retlen += source_len;
814 source += source_len;
818 VARATT_SIZEP(result) = retlen + VARHDRSZ;
821 * There may be some wasted space in the result if deletions occurred,
822 * but it's not worth reallocating it; the function result probably
823 * won't live long anyway.
826 PG_RETURN_TEXT_P(result);
829 /********************************************************************
835 * int ascii(text string)
839 * Returns the decimal representation of the first character from
842 ********************************************************************/
845 ascii(PG_FUNCTION_ARGS)
847 text *string = PG_GETARG_TEXT_P(0);
849 if (VARSIZE(string) <= VARHDRSZ)
852 PG_RETURN_INT32((int32) *((unsigned char *) VARDATA(string)));
855 /********************************************************************
865 * Returns the character having the binary equivalent to val
867 ********************************************************************/
870 chr(PG_FUNCTION_ARGS)
872 int32 cvalue = PG_GETARG_INT32(0);
875 result = (text *) palloc(VARHDRSZ + 1);
876 VARATT_SIZEP(result) = VARHDRSZ + 1;
877 *VARDATA(result) = (char) cvalue;
879 PG_RETURN_TEXT_P(result);
882 /********************************************************************
888 * text repeat(text string, int val)
892 * Repeat string by val.
894 ********************************************************************/
897 repeat(PG_FUNCTION_ARGS)
899 text *string = PG_GETARG_TEXT_P(0);
900 int32 count = PG_GETARG_INT32(1);
910 slen = (VARSIZE(string) - VARHDRSZ);
911 tlen = (VARHDRSZ + (count * slen));
913 /* Check for integer overflow */
914 if (slen != 0 && count != 0)
916 int check = count * slen;
917 int check2 = check + VARHDRSZ;
919 if ((check / slen) != count || check2 <= check)
920 elog(ERROR, "Requested buffer is too large.");
923 result = (text *) palloc(tlen);
925 VARATT_SIZEP(result) = tlen;
926 cp = VARDATA(result);
927 for (i = 0; i < count; i++)
929 memcpy(cp, VARDATA(string), slen);
933 PG_RETURN_TEXT_P(result);