1 /*-------------------------------------------------------------------------
4 * PostgreSQL type definitions for ISNs (ISBN, ISMN, ISSN, EAN13, UPC)
6 * Copyright (c) 2004-2006, Germán Méndez Bravo (Kronuz)
7 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
10 * $PostgreSQL: pgsql/contrib/isn/isn.c,v 1.10 2009/01/01 17:23:32 momjian Exp $
12 *-------------------------------------------------------------------------
18 #include "utils/builtins.h"
29 #define MAXEAN13LEN 18
33 INVALID, ANY, EAN13, ISBN, ISMN, ISSN, UPC
36 static const char * const isn_names[] = {"EAN13/UPC/ISxN", "EAN13/UPC/ISxN", "EAN13", "ISBN", "ISMN", "ISSN", "UPC"};
38 static bool g_weak = false;
39 static bool g_initialized = false;
42 /***********************************************************************
44 ** Routines for EAN13/UPC/ISxNs.
47 ** In this code, a normalized string is one that is known to be a valid
48 ** ISxN number containing only digits and hyphens and with enough space
49 ** to hold the full 13 digits plus the maximum of four hyphens.
50 ***********************************************************************/
52 /*----------------------------------------------------------
54 *---------------------------------------------------------*/
57 * Check if the table and its index is correct (just for debugging)
61 check_table(const char *(*TABLE)[2], const unsigned TABLE_index[10][2])
74 if (TABLE == NULL || TABLE_index == NULL)
77 while (TABLE[i][0] && TABLE[i][1])
82 /* must always start with a digit: */
83 if (!isdigit((unsigned char) *aux1) || !isdigit((unsigned char) *aux2))
88 /* must always have the same format and length: */
89 while (*aux1 && *aux2)
91 if (!(isdigit((unsigned char) *aux1) &&
92 isdigit((unsigned char) *aux2)) &&
93 (*aux1 != *aux2 || *aux1 != '-'))
101 /* found a new range */
104 /* check current range in the index: */
105 for (j = x; j <= y; j++)
107 if (TABLE_index[j][0] != init)
109 if (TABLE_index[j][1] != i - init)
116 /* Always get the new limit */
126 elog(DEBUG1, "invalid table near {\"%s\", \"%s\"} (pos: %d)",
127 TABLE[i][0], TABLE[i][1], i);
131 elog(DEBUG1, "index %d is invalid", j);
134 #endif /* ISN_DEBUG */
136 /*----------------------------------------------------------
137 * Formatting and conversion routines.
138 *---------------------------------------------------------*/
141 dehyphenate(char *bufO, char *bufI)
147 if (isdigit((unsigned char) *bufI))
159 * hyphenate --- Try to hyphenate, in-place, the string starting at bufI
160 * into bufO using the given hyphenation range TABLE.
161 * Assumes the input string to be used is of only digits.
163 * Returns the number of characters acctually hyphenated.
166 hyphenate(char *bufO, char *bufI, const char *(*TABLE)[2], const unsigned TABLE_index[10][2])
169 const char *ean_aux1,
182 /* just compress the string if no further hyphenation is required */
183 if (TABLE == NULL || TABLE_index == NULL)
194 /* add remaining hyphenations */
196 search = *bufI - '0';
197 upper = lower = TABLE_index[search][0];
198 upper += TABLE_index[search][1];
201 step = (upper - lower) / 2;
204 search = lower + step;
207 ean_in1 = ean_in2 = false;
208 ean_aux1 = TABLE[search][0];
209 ean_aux2 = TABLE[search][1];
212 if ((ean_in1 || *firstdig >= *ean_aux1) && (ean_in2 || *firstdig <= *ean_aux2))
214 if (*firstdig > *ean_aux1)
216 if (*firstdig < *ean_aux2)
218 if (ean_in1 && ean_in2)
221 firstdig++, ean_aux1++, ean_aux2++;
222 if (!(*ean_aux1 && *ean_aux2 && *firstdig))
224 if (!isdigit((unsigned char) *ean_aux1))
225 ean_aux1++, ean_aux2++;
230 * check in what direction we should go and move the pointer
233 if (*firstdig < *ean_aux1 && !ean_in1)
238 step = (upper - lower) / 2;
239 search = lower + step;
241 /* Initialize stuff again: */
243 ean_in1 = ean_in2 = false;
244 ean_aux1 = TABLE[search][0];
245 ean_aux2 = TABLE[search][1];
253 ean_p = TABLE[search][0];
254 while (*ean_p && *aux2)
263 *aux1 = *aux2; /* add a lookahead char */
270 * weight_checkdig -- Receives a buffer with a normalized ISxN string number,
271 * and the length to weight.
273 * Returns the weight of the number (the check digit value, 0-10)
276 weight_checkdig(char *isn, unsigned size)
280 while (*isn && size > 1)
282 if (isdigit((unsigned char) *isn))
284 weight += size-- * (*isn - '0');
288 weight = weight % 11;
290 weight = 11 - weight;
296 * checkdig --- Receives a buffer with a normalized ISxN string number,
297 * and the length to check.
299 * Returns the check digit value (0-9)
302 checkdig(char *num, unsigned size)
309 { /* ISMN start with 'M' */
313 while (*num && size > 1)
315 if (isdigit((unsigned char) *num))
318 check3 += *num - '0';
325 check = (check + 3 * check3) % 10;
332 * ean2isn --- Try to convert an ean13 number to a UPC/ISxN number.
333 * This doesn't verify for a valid check digit.
335 * If errorOK is false, ereport a useful error message if the ean13 is bad.
336 * If errorOK is true, just return "false" for bad input.
339 ean2isn(ean13 ean, bool errorOK, ean13 * result, enum isn_type accept)
341 enum isn_type type = INVALID;
343 char buf[MAXEAN13LEN + 1];
351 /* verify it's in the EAN13 range */
352 if (ean > UINT64CONST(9999999999999))
355 /* convert the number */
357 firstdig = aux = buf + 13;
358 *aux = '\0'; /* terminate string; aux points to last digit */
361 digval = (unsigned) (ean % 10); /* get the decimal value */
362 ean /= 10; /* get next digit */
363 *--aux = (char) (digval + '0'); /* convert to ascii and store */
364 } while (ean && search++ < 12);
365 while (search++ < 12)
366 *--aux = '0'; /* fill the remaining EAN13 with '0' */
368 /* find out the data type: */
369 if (!strncmp("978", buf, 3))
373 else if (!strncmp("977", buf, 3))
377 else if (!strncmp("9790", buf, 4))
381 else if (!strncmp("979", buf, 3))
385 else if (*buf == '0')
393 if (accept != ANY && accept != EAN13 && accept != type)
405 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
406 errmsg("cannot cast EAN13(%s) to %s for number: \"%s\"",
407 isn_names[type], isn_names[accept], buf)));
412 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
413 errmsg("cannot cast %s to %s for number: \"%s\"",
414 isn_names[type], isn_names[accept], buf)));
425 * Format the number separately to keep the machine-dependent format
426 * code out of the translatable message text
428 snprintf(eanbuf, sizeof(eanbuf), EAN13_FORMAT, ean);
430 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
431 errmsg("value \"%s\" is out of range for %s type",
432 eanbuf, isn_names[type])));
438 * ean2UPC/ISxN --- Convert in-place a normalized EAN13 string to the corresponding
439 * UPC/ISxN string number. Assumes the input string is normalized.
447 /* the number should come in this format: 978-0-000-00000-0 */
448 /* Strip the first part and calculate the new check digit */
449 hyphenate(isn, isn + 4, NULL, NULL);
450 check = weight_checkdig(isn, 10);
451 aux = strchr(isn, '\0');
452 while (!isdigit((unsigned char) *--aux));
462 /* the number should come in this format: 979-0-000-00000-0 */
463 /* Just strip the first part and change the first digit ('0') to 'M' */
464 hyphenate(isn, isn + 4, NULL, NULL);
473 /* the number should come in this format: 977-0000-000-00-0 */
474 /* Strip the first part, crop, and calculate the new check digit */
475 hyphenate(isn, isn + 4, NULL, NULL);
476 check = weight_checkdig(isn, 8);
480 isn[8] = check + '0';
487 /* the number should come in this format: 000-000000000-0 */
488 /* Strip the first part, crop, and dehyphenate */
489 dehyphenate(isn, isn + 1);
494 * ean2* --- Converts a string of digits into an ean13 number.
495 * Assumes the input string is a string with only digits
496 * on it, and that it's within the range of ean13.
498 * Returns the ean13 value of the string.
501 str2ean(const char *num)
503 ean13 ean = 0; /* current ean */
507 if (isdigit((unsigned char) *num))
508 ean = 10 * ean + (*num - '0');
511 return (ean << 1); /* also give room to a flag */
515 * ean2string --- Try to convert an ean13 number to an hyphenated string.
516 * Assumes there's enough space in result to hold
517 * the string (maximum MAXEAN13LEN+1 bytes)
518 * This doesn't verify for a valid check digit.
520 * If shortType is true, the returned string is in the old ISxN short format.
521 * If errorOK is false, ereport a useful error message if the string is bad.
522 * If errorOK is true, just return "false" for bad input.
525 ean2string(ean13 ean, bool errorOK, char *result, bool shortType)
527 const char *(*TABLE)[2];
528 const unsigned (*TABLE_index)[2];
529 enum isn_type type = INVALID;
535 char valid = '\0'; /* was the number initially written with a
536 * valid check digit? */
538 TABLE_index = ISBN_index;
543 /* verify it's in the EAN13 range */
544 if (ean > UINT64CONST(9999999999999))
547 /* convert the number */
549 firstdig = aux = result + MAXEAN13LEN;
550 *aux = '\0'; /* terminate string; aux points to last digit */
551 *--aux = valid; /* append '!' for numbers with invalid but
552 * corrected check digit */
555 digval = (unsigned) (ean % 10); /* get the decimal value */
556 ean /= 10; /* get next digit */
557 *--aux = (char) (digval + '0'); /* convert to ascii and store */
559 *--aux = '-'; /* the check digit is always there */
560 } while (ean && search++ < 13);
561 while (search++ < 13)
562 *--aux = '0'; /* fill the remaining EAN13 with '0' */
564 /* The string should be in this form: ???DDDDDDDDDDDD-D" */
565 search = hyphenate(result, result + 3, EAN13_range, EAN13_index);
567 /* verify it's a logically valid EAN13 */
570 search = hyphenate(result, result + 3, NULL, NULL);
574 /* find out what type of hyphenation is needed: */
575 if (!strncmp("978-", result, search))
577 /* The string should be in this form: 978-??000000000-0" */
580 TABLE_index = ISBN_index;
582 else if (!strncmp("977-", result, search))
584 /* The string should be in this form: 977-??000000000-0" */
587 TABLE_index = ISSN_index;
589 else if (!strncmp("979-0", result, search + 1))
591 /* The string should be in this form: 979-0?000000000-0" */
594 TABLE_index = ISMN_index;
596 else if (*result == '0')
598 /* The string should be in this form: 000-00000000000-0" */
601 TABLE_index = UPC_index;
610 /* verify it's a logically valid EAN13/UPC/ISxN */
612 search = hyphenate(result + digval, result + digval + 2, TABLE, TABLE_index);
614 /* verify it's a valid EAN13 */
617 search = hyphenate(result + digval, result + digval + 2, NULL, NULL);
622 /* convert to the old short type: */
649 * Format the number separately to keep the machine-dependent format
650 * code out of the translatable message text
652 snprintf(eanbuf, sizeof(eanbuf), EAN13_FORMAT, ean);
654 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
655 errmsg("value \"%s\" is out of range for %s type",
656 eanbuf, isn_names[type])));
662 * string2ean --- try to parse a string into an ean13.
664 * If errorOK is false, ereport a useful error message if the string is bad.
665 * If errorOK is true, just return "false" for bad input.
667 * if the input string ends with '!' it will always be treated as invalid
668 * (even if the check digit is valid)
671 string2ean(const char *str, bool errorOK, ean13 * result,
672 enum isn_type accept)
677 char *aux1 = buf + 3; /* leave space for the first part, in case
679 const char *aux2 = str;
680 enum isn_type type = INVALID;
682 rcheck = (unsigned) -1;
687 /* recognize and validate the number: */
688 while (*aux2 && length <= 13)
690 last = (*(aux2 + 1) == '!' || *(aux2 + 1) == '\0'); /* is the last character */
691 digit = (isdigit((unsigned char) *aux2) != 0); /* is current character
693 if (*aux2 == '?' && last) /* automagically calculate check digit
695 magic = digit = true;
696 if (length == 0 && (*aux2 == 'M' || *aux2 == 'm'))
698 /* only ISMN can be here */
705 else if (length == 7 && (digit || *aux2 == 'X' || *aux2 == 'x') && last)
707 /* only ISSN can be here */
711 *aux1++ = toupper((unsigned char) *aux2);
714 else if (length == 9 && (digit || *aux2 == 'X' || *aux2 == 'x') && last)
716 /* only ISBN and ISMN can be here */
717 if (type != INVALID && type != ISMN)
720 type = ISBN; /* ISMN must start with 'M' */
721 *aux1++ = toupper((unsigned char) *aux2);
724 else if (length == 11 && digit && last)
726 /* only UPC can be here */
733 else if (*aux2 == '-' || *aux2 == ' ')
735 /* skip, we could validate but I think it's worthless */
737 else if (*aux2 == '!' && *(aux2 + 1) == '\0')
739 /* the invalid check digit sufix was found, set it */
756 *aux1 = '\0'; /* terminate the string */
758 /* find the current check digit value */
761 /* only EAN13 can be here */
765 check = buf[15] - '0';
767 else if (length == 12)
769 /* only UPC can be here */
772 check = buf[14] - '0';
774 else if (length == 10)
776 if (type != ISBN && type != ISMN)
781 check = buf[12] - '0';
783 else if (length == 8)
785 if (type != INVALID && type != ISSN)
791 check = buf[10] - '0';
799 /* obtain the real check digit value, validate, and convert to ean13: */
800 if (accept == EAN13 && type != accept)
802 if (accept != ANY && type != EAN13 && type != accept)
807 valid = (valid && ((rcheck = checkdig(buf + 3, 13)) == check || magic));
808 /* now get the subtype of EAN13: */
811 else if (!strncmp("977", buf + 3, 3))
813 else if (!strncmp("978", buf + 3, 3))
815 else if (!strncmp("9790", buf + 3, 4))
817 else if (!strncmp("979", buf + 3, 3))
819 if (accept != EAN13 && accept != ANY && type != accept)
823 strncpy(buf, "9790", 4); /* this isn't for sure yet, for now
824 * ISMN it's only 9790 */
825 valid = (valid && ((rcheck = checkdig(buf + 3, 10)) == check || magic));
828 strncpy(buf, "978", 3);
829 valid = (valid && ((rcheck = weight_checkdig(buf + 3, 10)) == check || magic));
832 strncpy(buf + 10, "00", 2); /* append 00 as the normal issue
833 * publication code */
834 strncpy(buf, "977", 3);
835 valid = (valid && ((rcheck = weight_checkdig(buf + 3, 8)) == check || magic));
839 valid = (valid && ((rcheck = checkdig(buf + 2, 13)) == check || magic));
844 /* fix the check digit: */
845 for (aux1 = buf; *aux1 && *aux1 <= ' '; aux1++);
846 aux1[12] = checkdig(aux1, 13) + '0';
849 if (!valid && !magic)
852 *result = str2ean(aux1);
853 *result |= valid ? 0 : 1;
858 { /* weak input mode is activated: */
859 /* set the "invalid-check-digit-on-input" flag */
860 *result = str2ean(aux1);
867 if (rcheck == (unsigned) -1)
870 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
871 errmsg("invalid %s number: \"%s\"",
872 isn_names[accept], str)));
877 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
878 errmsg("invalid check digit for %s number: \"%s\", should be %c",
879 isn_names[accept], str, (rcheck == 10) ? ('X') : (rcheck + '0'))));
887 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
888 errmsg("invalid input syntax for %s number: \"%s\"",
889 isn_names[accept], str)));
895 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
896 errmsg("cannot cast %s to %s for number: \"%s\"",
897 isn_names[type], isn_names[accept], str)));
903 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
904 errmsg("value \"%s\" is out of range for %s type",
905 str, isn_names[accept])));
909 /*----------------------------------------------------------
911 *---------------------------------------------------------*/
917 if (!check_table(EAN13, EAN13_index))
918 elog(LOG, "EAN13 failed check");
919 if (!check_table(ISBN, ISBN_index))
920 elog(LOG, "ISBN failed check");
921 if (!check_table(ISMN, ISMN_index))
922 elog(LOG, "ISMN failed check");
923 if (!check_table(ISSN, ISSN_index))
924 elog(LOG, "ISSN failed check");
925 if (!check_table(UPC, UPC_index))
926 elog(LOG, "UPC failed check");
928 g_initialized = true;
933 PG_FUNCTION_INFO_V1(isn_out);
935 isn_out(PG_FUNCTION_ARGS)
937 ean13 val = PG_GETARG_EAN13(0);
939 char buf[MAXEAN13LEN + 1];
941 (void) ean2string(val, false, buf, true);
943 result = pstrdup(buf);
944 PG_RETURN_CSTRING(result);
949 PG_FUNCTION_INFO_V1(ean13_out);
951 ean13_out(PG_FUNCTION_ARGS)
953 ean13 val = PG_GETARG_EAN13(0);
955 char buf[MAXEAN13LEN + 1];
957 (void) ean2string(val, false, buf, false);
959 result = pstrdup(buf);
960 PG_RETURN_CSTRING(result);
965 PG_FUNCTION_INFO_V1(ean13_in);
967 ean13_in(PG_FUNCTION_ARGS)
969 const char *str = PG_GETARG_CSTRING(0);
972 (void) string2ean(str, false, &result, EAN13);
973 PG_RETURN_EAN13(result);
978 PG_FUNCTION_INFO_V1(isbn_in);
980 isbn_in(PG_FUNCTION_ARGS)
982 const char *str = PG_GETARG_CSTRING(0);
985 (void) string2ean(str, false, &result, ISBN);
986 PG_RETURN_EAN13(result);
991 PG_FUNCTION_INFO_V1(ismn_in);
993 ismn_in(PG_FUNCTION_ARGS)
995 const char *str = PG_GETARG_CSTRING(0);
998 (void) string2ean(str, false, &result, ISMN);
999 PG_RETURN_EAN13(result);
1004 PG_FUNCTION_INFO_V1(issn_in);
1006 issn_in(PG_FUNCTION_ARGS)
1008 const char *str = PG_GETARG_CSTRING(0);
1011 (void) string2ean(str, false, &result, ISSN);
1012 PG_RETURN_EAN13(result);
1017 PG_FUNCTION_INFO_V1(upc_in);
1019 upc_in(PG_FUNCTION_ARGS)
1021 const char *str = PG_GETARG_CSTRING(0);
1024 (void) string2ean(str, false, &result, UPC);
1025 PG_RETURN_EAN13(result);
1028 /* casting functions
1030 PG_FUNCTION_INFO_V1(isbn_cast_from_ean13);
1032 isbn_cast_from_ean13(PG_FUNCTION_ARGS)
1034 ean13 val = PG_GETARG_EAN13(0);
1037 (void) ean2isn(val, false, &result, ISBN);
1039 PG_RETURN_EAN13(result);
1042 PG_FUNCTION_INFO_V1(ismn_cast_from_ean13);
1044 ismn_cast_from_ean13(PG_FUNCTION_ARGS)
1046 ean13 val = PG_GETARG_EAN13(0);
1049 (void) ean2isn(val, false, &result, ISMN);
1051 PG_RETURN_EAN13(result);
1054 PG_FUNCTION_INFO_V1(issn_cast_from_ean13);
1056 issn_cast_from_ean13(PG_FUNCTION_ARGS)
1058 ean13 val = PG_GETARG_EAN13(0);
1061 (void) ean2isn(val, false, &result, ISSN);
1063 PG_RETURN_EAN13(result);
1066 PG_FUNCTION_INFO_V1(upc_cast_from_ean13);
1068 upc_cast_from_ean13(PG_FUNCTION_ARGS)
1070 ean13 val = PG_GETARG_EAN13(0);
1073 (void) ean2isn(val, false, &result, UPC);
1075 PG_RETURN_EAN13(result);
1079 /* is_valid - returns false if the "invalid-check-digit-on-input" is set
1081 PG_FUNCTION_INFO_V1(is_valid);
1083 is_valid(PG_FUNCTION_ARGS)
1085 ean13 val = PG_GETARG_EAN13(0);
1087 PG_RETURN_BOOL((val & 1) == 0);
1090 /* make_valid - unsets the "invalid-check-digit-on-input" flag
1092 PG_FUNCTION_INFO_V1(make_valid);
1094 make_valid(PG_FUNCTION_ARGS)
1096 ean13 val = PG_GETARG_EAN13(0);
1098 val &= ~((ean13) 1);
1099 PG_RETURN_EAN13(val);
1102 /* this function temporarily sets weak input flag
1103 * (to lose the strictness of check digit acceptance)
1104 * It's a helper function, not intended to be used!!
1106 PG_FUNCTION_INFO_V1(accept_weak_input);
1108 accept_weak_input(PG_FUNCTION_ARGS)
1110 #ifdef ISN_WEAK_MODE
1111 g_weak = PG_GETARG_BOOL(0);
1113 /* function has no effect */
1114 #endif /* ISN_WEAK_MODE */
1115 PG_RETURN_BOOL(g_weak);
1118 PG_FUNCTION_INFO_V1(weak_input_status);
1120 weak_input_status(PG_FUNCTION_ARGS)
1122 PG_RETURN_BOOL(g_weak);