From c31cb1351c7989a85bbed76be4f9bb20b4119974 Mon Sep 17 00:00:00 2001 From: Paul Ramsey Date: Thu, 27 May 2010 13:19:12 +0000 Subject: [PATCH] Fix utf8 to return *something* when it can, so that something can be reported in the error string. git-svn-id: http://svn.osgeo.org/postgis/trunk@5646 b70326c6-7e19-0410-871a-916f4a2858ee --- loader/shp2pgsql-core.c | 112 +++++++++++++++++++++++++--------------- loader/shp2pgsql-core.h | 2 +- 2 files changed, 70 insertions(+), 44 deletions(-) diff --git a/loader/shp2pgsql-core.c b/loader/shp2pgsql-core.c index d169e3811..fbb107c10 100644 --- a/loader/shp2pgsql-core.c +++ b/loader/shp2pgsql-core.c @@ -40,7 +40,11 @@ void lwgeom_init_allocators() * Internal functions */ -char *utf8(const char *fromcode, char *inputbuf); +#define UTF8_GOOD_RESULT 0 +#define UTF8_BAD_RESULT 1 +#define UTF8_NO_RESULT 2 + +int utf8(const char *fromcode, char *inputbuf, char **outputbuf); char *escape_copy_string(char *str); char *escape_insert_string(char *str); @@ -53,50 +57,54 @@ int GeneratePolygonGeometry(SHPLOADERSTATE *state, SHPObject *obj, char **geomet /* Return allocated string containing UTF8 string converted from encoding fromcode */ -char * -utf8(const char *fromcode, char *inputbuf) +int utf8(const char *fromcode, char *inputbuf, char **outputbuf) { iconv_t cd; char *inbufptr = inputbuf; char *outputptr; - char *outputbuf; size_t outbytesleft; size_t inbytesleft; + int on = 1; inbytesleft = strlen(inputbuf); cd = iconv_open("UTF-8", fromcode); if ( cd == ((iconv_t)(-1)) ) - return NULL; + return UTF8_NO_RESULT; outbytesleft = inbytesleft * 3 + 1; /* UTF8 string can be 3 times larger */ /* then local string */ - outputbuf = (char *)malloc(outbytesleft); - if (!outputbuf) - return NULL; + *outputbuf = (char *)malloc(outbytesleft); + if (!*outputbuf) + return UTF8_NO_RESULT; - memset(outputbuf, 0, outbytesleft); - outputptr = outputbuf; + memset(*outputbuf, 0, outbytesleft); + outputptr = *outputbuf; - if (-1 == iconv(cd, &inbufptr, &inbytesleft, &outputptr, &outbytesleft)) + /* Does this string convert cleanly? */ + if ( iconv(cd, &inputbuf, &inbytesleft, &outputptr, &outbytesleft) == -1 ) { - switch (errno) - { - case EINVAL: - fprintf(stderr, "WARNING: Incomplete multibyte sequence in string '%s' discarded\n", inputbuf); - *outputptr = '\0'; - break; - case EILSEQ: - fprintf(stderr, "ERROR: Invalid multibyte sequence '%s' in string '%s'\n", inbufptr, inputbuf); - case E2BIG: /* This would be a programmatic error */ - default: - return NULL; - } - } - iconv_close (cd); - - return outputbuf; + /* No. Try to convert it while transliterating. */ + iconvctl(cd, ICONV_SET_TRANSLITERATE, &on); + if ( iconv(cd, &inputbuf, &inbytesleft, &outputptr, &outbytesleft) == -1 ) + { + /* No. Try to convert it while discarding errors. */ + iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &on); + if ( iconv(cd, &inputbuf, &inbytesleft, &outputptr, &outbytesleft) == -1 ) + { + /* Still no. Throw away the buffer and return. */ + free(*outputbuf); + iconv_close(cd); + return UTF8_NO_RESULT; + } + } + iconv_close(cd); + return UTF8_BAD_RESULT; + } + /* Return a good result, converted string is in buffer. */ + iconv_close(cd); + return UTF8_GOOD_RESULT; } /** @@ -1139,11 +1147,22 @@ ShpLoaderOpenShape(SHPLOADERSTATE *state) if (state->config->encoding) { - /* If we are converting from another encoding to UTF8, convert the field name to UTF8 */ - utf8str = utf8(state->config->encoding, name); - if (!utf8str) + static char *encoding_msg = "Try \"LATIN1\" (Western European), or one of the values described at http://www.postgresql.org/docs/current/static/multibyte.html."; + + int rv = utf8(state->config->encoding, name, &utf8str); + + if (rv != UTF8_GOOD_RESULT) { - snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert field name \"%s\" from %s encoding to UTF-8: iconv reports \"%s\"", name, state->config->encoding, strerror(errno)); + if( rv == UTF8_BAD_RESULT ) + snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert field name \"%s\" to UTF-8 (iconv reports \"%s\"). Current encoding is \"%s\". %s", utf8str, strerror(errno), state->config->encoding, encoding_msg); + else if( rv == UTF8_NO_RESULT ) + snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert field name to UTF-8 (iconv reports \"%s\"). Current encoding is \"%s\". %s", strerror(errno), state->config->encoding, encoding_msg); + else + snprintf(state->message, SHPLOADERMSGLEN, "Unexpected return value from utf8()"); + + if( rv == UTF8_BAD_RESULT ) + free(utf8str); + return SHPLOADERERR; } @@ -1569,18 +1588,25 @@ ShpLoaderGenerateSQLRowStatement(SHPLOADERSTATE *state, int item, char **strreco if (state->config->encoding) { - /* If we are converting from another encoding to UTF8, convert the field value to UTF8 */ - utf8str = utf8(state->config->encoding, val); - if (!utf8str) - { - snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert field value \"%s\" to UTF-8 (iconv reports \"%s\"). Current encoding is \"%s\", try \"LATIN1\" (Western European), or one of the values described at http://www.postgresql.org/docs/current/static/multibyte.html.", val, strerror(errno), state->config->encoding); - - return SHPLOADERERR; - } - - strncpy(val, utf8str, MAXVALUELEN); - free(utf8str); - } + static char *encoding_msg = "Try \"LATIN1\" (Western European), or one of the values described at http://www.postgresql.org/docs/current/static/multibyte.html."; + + int rv = utf8(state->config->encoding, val, &utf8str); + + if (rv != UTF8_GOOD_RESULT) + { + if( rv == UTF8_BAD_RESULT ) + snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert data value \"%s\" to UTF-8 (iconv reports \"%s\"). Current encoding is \"%s\". %s", utf8str, strerror(errno), state->config->encoding, encoding_msg); + else if( rv == UTF8_NO_RESULT ) + snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert data value to UTF-8 (iconv reports \"%s\"). Current encoding is \"%s\". %s", strerror(errno), state->config->encoding, encoding_msg); + else + snprintf(state->message, SHPLOADERMSGLEN, "Unexpected return value from utf8()"); + + if( rv == UTF8_BAD_RESULT ) + free(utf8str); + + return SHPLOADERERR; + } + } /* Escape attribute correctly according to dump format */ if (state->config->dump_format) diff --git a/loader/shp2pgsql-core.h b/loader/shp2pgsql-core.h index af8c07f84..869fac78e 100644 --- a/loader/shp2pgsql-core.h +++ b/loader/shp2pgsql-core.h @@ -68,7 +68,7 @@ /* * Default character encoding */ -#define ENCODING_DEFAULT "UTF8" +#define ENCODING_DEFAULT "UTF-8" /* * Structure to hold the loader configuration options -- 2.40.0