]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/varchar.c
Fix for multi-byte includes.
[postgresql] / src / backend / utils / adt / varchar.c
1 /*-------------------------------------------------------------------------
2  *
3  * varchar.c
4  *        Functions for the built-in type char() and varchar().
5  *
6  * Copyright (c) 1994, Regents of the University of California
7  *
8  *
9  * IDENTIFICATION
10  *        $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.53 1999/07/17 16:25:25 momjian Exp $
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include <stdio.h>
15 #include <string.h>
16
17 #include "postgres.h"
18 #include "access/htup.h"
19 #include "catalog/pg_type.h"
20 #include "utils/builtins.h"
21
22 #ifdef MULTIBYTE
23 #include "mb/pg_wchar.h"
24 #endif
25
26 #ifdef CYR_RECODE
27 char       *convertstr(char *, int, int);
28 #endif
29
30
31 /*
32  * CHAR() and VARCHAR() types are part of the ANSI SQL standard. CHAR()
33  * is for blank-padded string whose length is specified in CREATE TABLE.
34  * VARCHAR is for storing string whose length is at most the length specified
35  * at CREATE TABLE time.
36  *
37  * It's hard to implement these types because we cannot figure out what
38  * the length of the type from the type itself. I change (hopefully all) the
39  * fmgr calls that invoke input functions of a data type to supply the
40  * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
41  * the length of the attributes and hence the exact length of the char() or
42  * varchar(). We pass this to bpcharin() or varcharin().) In the case where
43  * we cannot determine the length, we pass in -1 instead and the input string
44  * must be null-terminated.
45  *
46  * We actually implement this as a varlena so that we don't have to pass in
47  * the length for the comparison functions. (The difference between "text"
48  * is that we truncate and possibly blank-pad the string at insertion time.)
49  *
50  *                                                                                                                        - ay 6/95
51  */
52
53
54 /*****************************************************************************
55  *       bpchar - char()                                                                                                                 *
56  *****************************************************************************/
57
58 /*
59  * bpcharin -
60  *        converts a string of char() type to the internal representation.
61  *        len is the length specified in () plus VARHDRSZ bytes. (XXX dummy is here
62  *        because we pass typelem as the second argument for array_in.)
63  */
64 char *
65 bpcharin(char *s, int dummy, int32 atttypmod)
66 {
67         char       *result,
68                            *r;
69         int                     len;
70         int                     i;
71
72         if (s == NULL)
73                 return (char *) NULL;
74
75         if (atttypmod == -1)
76         {
77
78                 /*
79                  * this is here because some functions can't supply the atttypmod
80                  */
81                 len = strlen(s);
82                 atttypmod = len + VARHDRSZ;
83         }
84         else
85                 len = atttypmod - VARHDRSZ;
86
87         if (len > MaxAttrSize)
88                 elog(ERROR, "bpcharin: length of char() must be less than %d",
89                                 MaxAttrSize);
90
91         result = (char *) palloc(atttypmod);
92         VARSIZE(result) = atttypmod;
93         r = VARDATA(result);
94         for (i = 0; i < len; i++, r++, s++)
95         {
96                 *r = *s;
97                 if (*r == '\0')
98                         break;
99         }
100
101 #ifdef CYR_RECODE
102         convertstr(result + VARHDRSZ, len, 0);
103 #endif
104
105         /* blank pad the string if necessary */
106         for (; i < len; i++)
107                 *r++ = ' ';
108         return result;
109 }
110
111 char *
112 bpcharout(char *s)
113 {
114         char       *result;
115         int                     len;
116
117         if (s == NULL)
118         {
119                 result = (char *) palloc(2);
120                 result[0] = '-';
121                 result[1] = '\0';
122         }
123         else
124         {
125                 len = VARSIZE(s) - VARHDRSZ;
126                 result = (char *) palloc(len + 1);
127                 StrNCpy(result, VARDATA(s), len + 1);   /* these are blank-padded */
128         }
129
130 #ifdef CYR_RECODE
131         convertstr(result, len, 1);
132 #endif
133
134         return result;
135 }
136
137 /* bpchar()
138  * Converts a char() type to a specific internal length.
139  * len is the length specified in () plus VARHDRSZ bytes.
140  */
141 char *
142 bpchar(char *s, int32 len)
143 {
144         char       *result,
145                            *r;
146         int                     rlen,
147                                 slen;
148         int                     i;
149
150         if (s == NULL)
151                 return (char *) NULL;
152
153         if ((len == -1) || (len == VARSIZE(s)))
154                 return s;
155
156         rlen = len - VARHDRSZ;
157
158         if (rlen > MaxAttrSize)
159                 elog(ERROR, "bpchar: length of char() must be less than %d",
160                         MaxAttrSize);
161
162 #ifdef STRINGDEBUG
163         printf("bpchar- convert string length %d (%d) ->%d (%d)\n",
164                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), rlen, len);
165 #endif
166
167         result = (char *) palloc(len);
168         VARSIZE(result) = len;
169         r = VARDATA(result);
170 #ifdef MULTIBYTE
171
172         /*
173          * truncate multi-byte string in a way not to break multi-byte
174          * boundary
175          */
176         if (VARSIZE(s) > len)
177                 slen = pg_mbcliplen(VARDATA(s), VARSIZE(s) - VARHDRSZ, rlen);
178         else
179                 slen = VARSIZE(s) - VARHDRSZ;
180 #else
181         slen = VARSIZE(s) - VARHDRSZ;
182 #endif
183         s = VARDATA(s);
184
185 #ifdef STRINGDEBUG
186         printf("bpchar- string is '");
187 #endif
188
189         for (i = 0; (i < rlen) && (i < slen); i++)
190         {
191                 if (*s == '\0')
192                         break;
193
194 #ifdef STRINGDEBUG
195                 printf("%c", *s);
196 #endif
197
198                 *r++ = *s++;
199         }
200
201 #ifdef STRINGDEBUG
202         printf("'\n");
203 #endif
204
205         /* blank pad the string if necessary */
206         for (; i < rlen; i++)
207                 *r++ = ' ';
208
209         return result;
210 }       /* bpchar() */
211
212 /* _bpchar()
213  * Converts an array of char() type to a specific internal length.
214  * len is the length specified in () plus VARHDRSZ bytes.
215  */
216 ArrayType  *
217 _bpchar(ArrayType *v, int32 len)
218 {
219         return array_map(v, BPCHAROID, bpchar, BPCHAROID, 1, len);
220 }
221
222
223 /* bpchar_char()
224  * Convert bpchar(1) to char.
225  */
226 int32
227 bpchar_char(char *s)
228 {
229         return (int32) *VARDATA(s);
230 }       /* bpchar_char() */
231
232 /* char_bpchar()
233  * Convert char to bpchar(1).
234  */
235 char *
236 char_bpchar(int32 c)
237 {
238         char       *result;
239
240         result = palloc(VARHDRSZ + 1);
241
242         VARSIZE(result) = VARHDRSZ + 1;
243         *(VARDATA(result)) = (char) c;
244
245         return result;
246 }       /* char_bpchar() */
247
248
249 /* bpchar_name()
250  * Converts a bpchar() type to a NameData type.
251  */
252 NameData   *
253 bpchar_name(char *s)
254 {
255         NameData   *result;
256         int                     len;
257
258         if (s == NULL)
259                 return NULL;
260
261         len = VARSIZE(s) - VARHDRSZ;
262         if (len > NAMEDATALEN)
263                 len = NAMEDATALEN;
264
265         while (len > 0)
266         {
267                 if (*(VARDATA(s) + len - 1) != ' ')
268                         break;
269                 len--;
270         }
271
272 #ifdef STRINGDEBUG
273         printf("bpchar- convert string length %d (%d) ->%d\n",
274                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
275 #endif
276
277         result = (NameData *) palloc(NAMEDATALEN);
278         StrNCpy(result->data, VARDATA(s), NAMEDATALEN);
279
280         /* now null pad to full length... */
281         while (len < NAMEDATALEN)
282         {
283                 *(result->data + len) = '\0';
284                 len++;
285         }
286
287         return result;
288 }       /* bpchar_name() */
289
290 /* name_bpchar()
291  * Converts a NameData type to a bpchar type.
292  */
293 char *
294 name_bpchar(NameData *s)
295 {
296         char       *result;
297         int                     len;
298
299         if (s == NULL)
300                 return NULL;
301
302         len = strlen(s->data);
303
304 #ifdef STRINGDEBUG
305         printf("bpchar- convert string length %d (%d) ->%d\n",
306                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
307 #endif
308
309         result = (char *) palloc(VARHDRSZ + len);
310         strncpy(VARDATA(result), s->data, len);
311         VARSIZE(result) = len + VARHDRSZ;
312
313         return result;
314 }       /* name_bpchar() */
315
316
317 /*****************************************************************************
318  *       varchar - varchar()                                                                                                     *
319  *****************************************************************************/
320
321 /*
322  * varcharin -
323  *        converts a string of varchar() type to the internal representation.
324  *        len is the length specified in () plus VARHDRSZ bytes. (XXX dummy is here
325  *        because we pass typelem as the second argument for array_in.)
326  */
327 char *
328 varcharin(char *s, int dummy, int32 atttypmod)
329 {
330         char       *result;
331         int                     len;
332
333         if (s == NULL)
334                 return (char *) NULL;
335
336         len = strlen(s) + VARHDRSZ;
337         if (atttypmod != -1 && len > atttypmod)
338                 len = atttypmod;                /* clip the string at max length */
339
340         if (len > MaxAttrSize)
341                 elog(ERROR, "varcharin: length of char() must be less than %d",
342                                 MaxAttrSize);
343
344         result = (char *) palloc(len);
345         VARSIZE(result) = len;
346         strncpy(VARDATA(result), s, len - VARHDRSZ);
347
348 #ifdef CYR_RECODE
349         convertstr(result + VARHDRSZ, len, 0);
350 #endif
351
352         return result;
353 }
354
355 char *
356 varcharout(char *s)
357 {
358         char       *result;
359         int                     len;
360
361         if (s == NULL)
362         {
363                 result = (char *) palloc(2);
364                 result[0] = '-';
365                 result[1] = '\0';
366         }
367         else
368         {
369                 len = VARSIZE(s) - VARHDRSZ;
370                 result = (char *) palloc(len + 1);
371                 StrNCpy(result, VARDATA(s), len + 1);
372         }
373
374 #ifdef CYR_RECODE
375         convertstr(result, len, 1);
376 #endif
377
378         return result;
379 }
380
381 /* varchar()
382  * Converts a varchar() type to the specified size.
383  * slen is the length specified in () plus VARHDRSZ bytes.
384  */
385 char *
386 varchar(char *s, int32 slen)
387 {
388         char       *result;
389         int                     len;
390
391         if (s == NULL)
392                 return (char *) NULL;
393
394         len = VARSIZE(s);
395         if ((slen == -1) || (len <= slen))
396                 return (char *) s;
397
398         /* only reach here if we need to truncate string... */
399
400 #ifdef MULTIBYTE
401
402         /*
403          * truncate multi-byte string in a way not to break multi-byte
404          * boundary
405          */
406         len = pg_mbcliplen(VARDATA(s), slen - VARHDRSZ, slen - VARHDRSZ);
407         slen = len + VARHDRSZ;
408 #else
409         len = slen - VARHDRSZ;
410 #endif
411
412         if (len > MaxAttrSize)
413                 elog(ERROR, "varchar: length of varchar() must be less than %d",
414                         MaxAttrSize);
415
416         result = (char *) palloc(slen);
417         VARSIZE(result) = slen;
418         strncpy(VARDATA(result), VARDATA(s), len);
419
420         return result;
421 }       /* varchar() */
422
423 /* _varchar()
424  * Converts an array of varchar() type to the specified size.
425  * len is the length specified in () plus VARHDRSZ bytes.
426  */
427 ArrayType  *
428 _varchar(ArrayType *v, int32 len)
429 {
430         return array_map(v, VARCHAROID, varchar, VARCHAROID, 1, len);
431 }
432
433
434 /*****************************************************************************
435  *      Comparison Functions used for bpchar
436  *****************************************************************************/
437
438 static int
439 bcTruelen(char *arg)
440 {
441         char       *s = VARDATA(arg);
442         int                     i;
443         int                     len;
444
445         len = VARSIZE(arg) - VARHDRSZ;
446         for (i = len - 1; i >= 0; i--)
447         {
448                 if (s[i] != ' ')
449                         break;
450         }
451         return i + 1;
452 }
453
454 int32
455 bpcharlen(char *arg)
456 {
457 #ifdef MULTIBYTE
458         unsigned char *s;
459         int                     len,
460                                 l,
461                                 wl;
462
463 #endif
464         if (!PointerIsValid(arg))
465                 elog(ERROR, "Bad (null) char() external representation", NULL);
466 #ifdef MULTIBYTE
467         l = bcTruelen(arg);
468         len = 0;
469         s = VARDATA(arg);
470         while (l > 0)
471         {
472                 wl = pg_mblen(s);
473                 l -= wl;
474                 s += wl;
475                 len++;
476         }
477         return (len);
478 #else
479         return bcTruelen(arg);
480 #endif
481 }
482
483 int32
484 bpcharoctetlen(char *arg)
485 {
486         if (!PointerIsValid(arg))
487                 elog(ERROR, "Bad (null) char() external representation", NULL);
488
489         return bcTruelen(arg);
490 }
491
492 bool
493 bpchareq(char *arg1, char *arg2)
494 {
495         int                     len1,
496                                 len2;
497
498         if (arg1 == NULL || arg2 == NULL)
499                 return (bool) 0;
500         len1 = bcTruelen(arg1);
501         len2 = bcTruelen(arg2);
502
503         if (len1 != len2)
504                 return 0;
505
506         return strncmp(VARDATA(arg1), VARDATA(arg2), len1) == 0;
507 }
508
509 bool
510 bpcharne(char *arg1, char *arg2)
511 {
512         int                     len1,
513                                 len2;
514
515         if (arg1 == NULL || arg2 == NULL)
516                 return (bool) 0;
517         len1 = bcTruelen(arg1);
518         len2 = bcTruelen(arg2);
519
520         if (len1 != len2)
521                 return 1;
522
523         return strncmp(VARDATA(arg1), VARDATA(arg2), len1) != 0;
524 }
525
526 bool
527 bpcharlt(char *arg1, char *arg2)
528 {
529         int                     len1,
530                                 len2;
531         int                     cmp;
532
533         if (arg1 == NULL || arg2 == NULL)
534                 return (bool) 0;
535         len1 = bcTruelen(arg1);
536         len2 = bcTruelen(arg2);
537
538         cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
539         if (cmp == 0)
540                 return len1 < len2;
541         else
542                 return cmp < 0;
543 }
544
545 bool
546 bpcharle(char *arg1, char *arg2)
547 {
548         int                     len1,
549                                 len2;
550         int                     cmp;
551
552         if (arg1 == NULL || arg2 == NULL)
553                 return (bool) 0;
554         len1 = bcTruelen(arg1);
555         len2 = bcTruelen(arg2);
556
557         cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
558         if (0 == cmp)
559                 return (bool) (len1 <= len2 ? 1 : 0);
560         else
561                 return (bool) (cmp <= 0);
562 }
563
564 bool
565 bpchargt(char *arg1, char *arg2)
566 {
567         int                     len1,
568                                 len2;
569         int                     cmp;
570
571         if (arg1 == NULL || arg2 == NULL)
572                 return (bool) 0;
573         len1 = bcTruelen(arg1);
574         len2 = bcTruelen(arg2);
575
576         cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
577         if (cmp == 0)
578                 return len1 > len2;
579         else
580                 return cmp > 0;
581 }
582
583 bool
584 bpcharge(char *arg1, char *arg2)
585 {
586         int                     len1,
587                                 len2;
588         int                     cmp;
589
590         if (arg1 == NULL || arg2 == NULL)
591                 return (bool) 0;
592         len1 = bcTruelen(arg1);
593         len2 = bcTruelen(arg2);
594
595         cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
596         if (0 == cmp)
597                 return (bool) (len1 >= len2 ? 1 : 0);
598         else
599                 return (bool) (cmp >= 0);
600 }
601
602 int32
603 bpcharcmp(char *arg1, char *arg2)
604 {
605         int                     len1,
606                                 len2;
607         int                     cmp;
608
609         len1 = bcTruelen(arg1);
610         len2 = bcTruelen(arg2);
611
612         cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
613         if ((0 == cmp) && (len1 != len2))
614                 return (int32) (len1 < len2 ? -1 : 1);
615         else
616                 return cmp;
617 }
618
619 /*****************************************************************************
620  *      Comparison Functions used for varchar
621  *****************************************************************************/
622
623 int32
624 varcharlen(char *arg)
625 {
626 #ifdef MULTIBYTE
627         unsigned char *s;
628         int                     len,
629                                 l,
630                                 wl;
631
632 #endif
633         if (!PointerIsValid(arg))
634                 elog(ERROR, "Bad (null) varchar() external representation", NULL);
635
636 #ifdef MULTIBYTE
637         len = 0;
638         s = VARDATA(arg);
639         l = VARSIZE(arg) - VARHDRSZ;
640         while (l > 0)
641         {
642                 wl = pg_mblen(s);
643                 l -= wl;
644                 s += wl;
645                 len++;
646         }
647         return (len);
648 #else
649         return VARSIZE(arg) - VARHDRSZ;
650 #endif
651 }
652
653 int32
654 varcharoctetlen(char *arg)
655 {
656         if (!PointerIsValid(arg))
657                 elog(ERROR, "Bad (null) varchar() external representation", NULL);
658         return VARSIZE(arg) - VARHDRSZ;
659 }
660
661 bool
662 varchareq(char *arg1, char *arg2)
663 {
664         int                     len1,
665                                 len2;
666
667         if (arg1 == NULL || arg2 == NULL)
668                 return (bool) 0;
669
670         len1 = VARSIZE(arg1) - VARHDRSZ;
671         len2 = VARSIZE(arg2) - VARHDRSZ;
672
673         if (len1 != len2)
674                 return 0;
675
676         return strncmp(VARDATA(arg1), VARDATA(arg2), len1) == 0;
677 }
678
679 bool
680 varcharne(char *arg1, char *arg2)
681 {
682         int                     len1,
683                                 len2;
684
685         if (arg1 == NULL || arg2 == NULL)
686                 return (bool) 0;
687         len1 = VARSIZE(arg1) - VARHDRSZ;
688         len2 = VARSIZE(arg2) - VARHDRSZ;
689
690         if (len1 != len2)
691                 return 1;
692
693         return strncmp(VARDATA(arg1), VARDATA(arg2), len1) != 0;
694 }
695
696 bool
697 varcharlt(char *arg1, char *arg2)
698 {
699         int                     len1,
700                                 len2;
701         int                     cmp;
702
703         if (arg1 == NULL || arg2 == NULL)
704                 return (bool) 0;
705         len1 = VARSIZE(arg1) - VARHDRSZ;
706         len2 = VARSIZE(arg2) - VARHDRSZ;
707
708         cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
709         if (cmp == 0)
710                 return len1 < len2;
711         else
712                 return cmp < 0;
713 }
714
715 bool
716 varcharle(char *arg1, char *arg2)
717 {
718         int                     len1,
719                                 len2;
720         int                     cmp;
721
722         if (arg1 == NULL || arg2 == NULL)
723                 return (bool) 0;
724         len1 = VARSIZE(arg1) - VARHDRSZ;
725         len2 = VARSIZE(arg2) - VARHDRSZ;
726
727         cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
728         if (0 == cmp)
729                 return (bool) (len1 <= len2 ? 1 : 0);
730         else
731                 return (bool) (cmp <= 0);
732 }
733
734 bool
735 varchargt(char *arg1, char *arg2)
736 {
737         int                     len1,
738                                 len2;
739         int                     cmp;
740
741         if (arg1 == NULL || arg2 == NULL)
742                 return (bool) 0;
743         len1 = VARSIZE(arg1) - VARHDRSZ;
744         len2 = VARSIZE(arg2) - VARHDRSZ;
745
746         cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
747         if (cmp == 0)
748                 return len1 > len2;
749         else
750                 return cmp > 0;
751 }
752
753 bool
754 varcharge(char *arg1, char *arg2)
755 {
756         int                     len1,
757                                 len2;
758         int                     cmp;
759
760         if (arg1 == NULL || arg2 == NULL)
761                 return (bool) 0;
762         len1 = VARSIZE(arg1) - VARHDRSZ;
763         len2 = VARSIZE(arg2) - VARHDRSZ;
764
765         cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
766         if (0 == cmp)
767                 return (bool) (len1 >= len2 ? 1 : 0);
768         else
769                 return (bool) (cmp >= 0);
770
771 }
772
773 int32
774 varcharcmp(char *arg1, char *arg2)
775 {
776         int                     len1,
777                                 len2;
778         int                     cmp;
779
780         len1 = VARSIZE(arg1) - VARHDRSZ;
781         len2 = VARSIZE(arg2) - VARHDRSZ;
782         cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
783         if ((0 == cmp) && (len1 != len2))
784                 return (int32) (len1 < len2 ? -1 : 1);
785         else
786                 return (int32) (cmp);
787 }
788
789 /*****************************************************************************
790  * Hash functions (modified from hashtext in access/hash/hashfunc.c)
791  *****************************************************************************/
792
793 uint32
794 hashbpchar(struct varlena * key)
795 {
796         int                     keylen;
797         char       *keydata;
798         uint32          n;
799         int                     loop;
800
801         keydata = VARDATA(key);
802         keylen = bcTruelen((char *) key);
803
804 #define HASHC   n = *keydata++ + 65599 * n
805
806         n = 0;
807         if (keylen > 0)
808         {
809                 loop = (keylen + 8 - 1) >> 3;
810
811                 switch (keylen & (8 - 1))
812                 {
813                         case 0:
814                                 do
815                                 {                               /* All fall throughs */
816                                         HASHC;
817                         case 7:
818                                         HASHC;
819                         case 6:
820                                         HASHC;
821                         case 5:
822                                         HASHC;
823                         case 4:
824                                         HASHC;
825                         case 3:
826                                         HASHC;
827                         case 2:
828                                         HASHC;
829                         case 1:
830                                         HASHC;
831                                 } while (--loop);
832                 }
833         }
834         return n;
835 }
836
837 uint32
838 hashvarchar(struct varlena * key)
839 {
840         int                     keylen;
841         char       *keydata;
842         uint32          n;
843         int                     loop;
844
845         keydata = VARDATA(key);
846         keylen = VARSIZE(key) - VARHDRSZ;
847
848 #define HASHC   n = *keydata++ + 65599 * n
849
850         n = 0;
851         if (keylen > 0)
852         {
853                 loop = (keylen + 8 - 1) >> 3;
854
855                 switch (keylen & (8 - 1))
856                 {
857                         case 0:
858                                 do
859                                 {                               /* All fall throughs */
860                                         HASHC;
861                         case 7:
862                                         HASHC;
863                         case 6:
864                                         HASHC;
865                         case 5:
866                                         HASHC;
867                         case 4:
868                                         HASHC;
869                         case 3:
870                                         HASHC;
871                         case 2:
872                                         HASHC;
873                         case 1:
874                                         HASHC;
875                                 } while (--loop);
876                 }
877         }
878         return n;
879 }