strcat(connstring, conn->database);
}
+ if ( ! getenv("PGCLIENTENCODING") )
+ {
+ strcat(connstring, " client_encoding=UTF8");
+ }
+
return connstring;
}
else
state->shp_file = state->table;
- /* Create the dbf file */
- state->dbf = DBFCreate(state->shp_file);
+ /* Create the dbf file: */
+ /* If there's a user-specified encoding hanging around, try and use that. */
+ /* Otherwise, just use UTF-8 encoding, since that's usually our client encoding. */
+ if ( getenv("PGCLIENTENCODING") )
+ {
+ char *codepage = encoding2codepage(getenv("PGCLIENTENCODING"));
+ state->dbf = DBFCreateEx(state->shp_file, codepage);
+ }
+ else
+ {
+ state->dbf = DBFCreateEx(state->shp_file, "UTF-8");
+ }
+
if (!state->dbf)
{
snprintf(state->message, SHPDUMPERMSGLEN, _("Could not create dbf file %s"), state->shp_file);
#include "../liblwgeom/lwgeom_log.h" /* for LWDEBUG macros */
-typedef struct
-{
- int ldid;
- int cpg;
- char *desc;
- char *iconv;
- char *pg;
-} code_page_entry;
-
-static int num_code_pages = 60;
-
-static code_page_entry code_pages[] = {
- {0x01, 437, "U.S. MS-DOS", "CP437",""},
- {0x02, 850, "International MS-DOS", "CP850",""},
- {0x03, 1252, "Window ANSI", "WINDOWS-1252","WIN1252"},
- {0x08, 865, "Danish OEM", "CP865",""},
- {0x09, 437, "Dutch OEM", "CP437",""},
- {0x0A, 850, "Dutch OEM*", "CP850",""},
- {0x0B, 437, "Finnish OEM", "CP437",""},
- {0x0D, 437, "French OEM", "CP437",""},
- {0x0E, 850, "French OEM*", "CP850",""},
- {0x0F, 437, "German OEM", "CP437",""},
- {0x10, 850, "German OEM*", "CP850",""},
- {0x11, 437, "Italian OEM", "CP437",""},
- {0x12, 850, "Italian OEM*", "CP850",""},
- {0x13, 932, "Japanese Shift-JIS", "CP932","SJIS"},
- {0x14, 850, "Spanish OEM*", "CP850",""},
- {0x15, 437, "Swedish OEM", "CP437",""},
- {0x16, 850, "Swedish OEM*", "CP850",""},
- {0x17, 865, "Norwegian OEM", "CP865",""},
- {0x18, 437, "Spanish OEM", "CP865",""},
- {0x19, 437, "English OEM (Britain)", "CP437",""},
- {0x1A, 850, "English OEM (Britain)*", "CP850",""},
- {0x1B, 437, "English OEM (U.S.)", "CP437",""},
- {0x1C, 863, "French OEM (Canada)", "CP863",""},
- {0x1D, 850, "French OEM*", "CP850",""},
- {0x1F, 852, "Czech OEM", "CP852",""},
- {0x22, 852, "Hungarian OEM", "CP852",""},
- {0x23, 852, "Polish OEM", "CP852",""},
- {0x24, 860, "Portugese OEM", "CP860",""},
- {0x25, 850, "Potugese OEM*", "CP850",""},
- {0x26, 866, "Russian OEM", "WINDOWS-866","WIN866"},
- {0x37, 850, "English OEM (U.S.)*", "CP850",""},
- {0x40, 852, "Romanian OEM", "CP852",""},
- {0x4D, 936, "Chinese GBK (PRC)", "CP936",""},
- {0x4E, 949, "Korean (ANSI/OEM)", "CP949",""},
- {0x4F, 950, "Chinese Big 5 (Taiwan)", "CP950","BIG5"},
- {0x50, 874, "Thai (ANSI/OEM)", "WIN874",""},
- {0x57, 1252, "ANSI", "WINDOWS-1252",""},
- {0x58, 1252, "Western European ANSI", "WINDOWS-1252",""},
- {0x59, 1252, "Spanish ANSI", "WINDOWS-1252",""},
- {0x64, 852, "Eastern European MS-DOS", "CP852",""},
- {0x65, 866, "Russian MS-DOS", "CP866",""},
- {0x66, 865, "Nordic MS-DOS", "CP865",""},
- {0x67, 861, "Icelandic MS-DOS", "",""},
- {0x6A, 737, "Greek MS-DOS (437G)", "CP737",""},
- {0x6B, 857, "Turkish MS-DOS", "CP857",""},
- {0x6C, 863, "French-Canadian MS-DOS", "CP863",""},
- {0x78, 950, "Taiwan Big 5", "CP950",""},
- {0x79, 949, "Hangul (Wansung)", "CP949",""},
- {0x7A, 936, "PRC GBK", "CP936","GBK"},
- {0x7B, 932, "Japanese Shift-JIS", "CP932",""},
- {0x7C, 874, "Thai Windows/MS-DOS", "WINDOWS-874","WIN874"},
- {0x86, 737, "Greek OEM", "CP737",""},
- {0x87, 852, "Slovenian OEM", "CP852",""},
- {0x88, 857, "Turkish OEM", "CP857",""},
- {0xC8, 1250, "Eastern European Windows", "WINDOWS-1250","WIN1250"},
- {0xC9, 1251, "Russian Windows", "WINDOWS-1251","WIN1251"},
- {0xCA, 1254, "Turkish Windows", "WINDOWS-1254","WIN1254"},
- {0xCB, 1253, "Greek Windows", "WINDOWS-1253","WIN1253"},
- {0xCC, 1257, "Baltic Window", "WINDOWS-1257","WIN1257"},
- {0xFF, 65001, "UTF-8", "UTF-8","UTF8"}
-};
-
/* Internal ring/point structures */
typedef struct struct_point
void ReleasePolygons(Ring **polys, int npolys);
int GeneratePolygonGeometry(SHPLOADERSTATE *state, SHPObject *obj, char **geometry);
-/*
-* Code page info will come out of dbfopen as either a bare codepage number
-* (e.g. 1256) or as "LDID/1234" from the DBF hreader.
-*/
-static char *
-codepage2encoding(const char *cpg)
-{
- int cpglen;
- int is_ldid = 0;
- int num, i;
-
- /* Do nothing on nothing. */
- if ( ! cpg ) return NULL;
-
- /* Is this an LDID string? */
- /* If so, note it and move past the "LDID/" tag */
- cpglen = strlen(cpg);
- if ( strstr(cpg, "LDID/") )
- {
- if ( cpglen > 5 )
- {
- cpg += 5;
- is_ldid = 1;
- }
- else
- {
- return NULL;
- }
- }
-
- /* Read the number */
- num = atoi(cpg);
-
- /* Can we find this number in our lookup table? */
- for ( i = is_ldid ; i < num_code_pages; i++ )
- {
- if ( is_ldid )
- {
- if ( code_pages[i].ldid == num )
- return strdup(code_pages[i].iconv);
- }
- else
- {
- if ( code_pages[i].cpg == num )
- return strdup(code_pages[i].iconv);
- }
- }
-
- /* Didn't find a matching entry */
- return NULL;
-
-}
/* Return allocated string containing UTF8 string converted from encoding fromcode */
static int
return 1;
}
+/*
+* Code page info will come out of dbfopen as either a bare codepage number
+* (e.g. 1256) or as "LDID/1234" from the DBF hreader. We want to look up
+* the equivalent iconv encoding string so we can use iconv to transcode
+* the data into UTF8
+*/
+char *
+codepage2encoding(const char *cpg)
+{
+ int cpglen;
+ int is_ldid = 0;
+ int num, i;
+
+ /* Do nothing on nothing. */
+ if ( ! cpg ) return NULL;
+
+ /* Is this an LDID string? */
+ /* If so, note it and move past the "LDID/" tag */
+ cpglen = strlen(cpg);
+ if ( strstr(cpg, "LDID/") )
+ {
+ if ( cpglen > 5 )
+ {
+ cpg += 5;
+ is_ldid = 1;
+ }
+ else
+ {
+ return NULL;
+ }
+ }
+
+ /* Read the number */
+ num = atoi(cpg);
+
+ /* Can we find this number in our lookup table? */
+ for ( i = is_ldid ; i < num_code_pages; i++ )
+ {
+ if ( is_ldid )
+ {
+ if ( code_pages[i].ldid == num )
+ return strdup(code_pages[i].iconv);
+ }
+ else
+ {
+ if ( code_pages[i].cpg == num )
+ return strdup(code_pages[i].iconv);
+ }
+ }
+
+ /* Didn't find a matching entry */
+ return NULL;
+
+}
+
+/*
+* In the case where data is coming out of the database in some wierd encoding
+* we want to look up the appropriate code page entry to feed to DBFCreateEx
+*/
+char *
+encoding2codepage(const char *encoding)
+{
+ int i;
+ for ( i = 0; i < num_code_pages; i++ )
+ {
+ if ( strcasecmp(encoding, code_pages[i].pg) == 0 )
+ {
+ if ( code_pages[i].ldid == 0xFF )
+ {
+ return strdup("UTF-8");
+ }
+ else
+ {
+ char *codepage = NULL;
+ asprintf(&codepage, "LDID/%d", code_pages[i].ldid);
+ return codepage;
+ }
+ }
+ }
+
+ /* OK, we give up, pretend it's UTF8 */
+ return strdup("UTF-8");
+}
#define _(String) String
#endif
+
+
+typedef struct
+{
+ int ldid;
+ int cpg;
+ char *desc;
+ char *iconv;
+ char *pg;
+} code_page_entry;
+
+static int num_code_pages = 60;
+
+/* http://www.autopark.ru/ASBProgrammerGuide/DBFSTRUC.HTM */
+/* http://resources.arcgis.com/fr/content/kbase?fa=articleShow&d=21106 */
+
+static code_page_entry code_pages[] = {
+ {0x01, 437, "U.S. MS-DOS", "CP437",""},
+ {0x02, 850, "International MS-DOS", "CP850",""},
+ {0x03, 1252, "Window ANSI", "WINDOWS-1252","WIN1252"},
+ {0x08, 865, "Danish OEM", "CP865",""},
+ {0x09, 437, "Dutch OEM", "CP437",""},
+ {0x0A, 850, "Dutch OEM*", "CP850",""},
+ {0x0B, 437, "Finnish OEM", "CP437",""},
+ {0x0D, 437, "French OEM", "CP437",""},
+ {0x0E, 850, "French OEM*", "CP850",""},
+ {0x0F, 437, "German OEM", "CP437",""},
+ {0x10, 850, "German OEM*", "CP850",""},
+ {0x11, 437, "Italian OEM", "CP437",""},
+ {0x12, 850, "Italian OEM*", "CP850",""},
+ {0x13, 932, "Japanese Shift-JIS", "CP932","SJIS"},
+ {0x14, 850, "Spanish OEM*", "CP850",""},
+ {0x15, 437, "Swedish OEM", "CP437",""},
+ {0x16, 850, "Swedish OEM*", "CP850",""},
+ {0x17, 865, "Norwegian OEM", "CP865",""},
+ {0x18, 437, "Spanish OEM", "CP865",""},
+ {0x19, 437, "English OEM (Britain)", "CP437",""},
+ {0x1A, 850, "English OEM (Britain)*", "CP850",""},
+ {0x1B, 437, "English OEM (U.S.)", "CP437",""},
+ {0x1C, 863, "French OEM (Canada)", "CP863",""},
+ {0x1D, 850, "French OEM*", "CP850",""},
+ {0x1F, 852, "Czech OEM", "CP852",""},
+ {0x22, 852, "Hungarian OEM", "CP852",""},
+ {0x23, 852, "Polish OEM", "CP852",""},
+ {0x24, 860, "Portugese OEM", "CP860",""},
+ {0x25, 850, "Potugese OEM*", "CP850",""},
+ {0x26, 866, "Russian OEM", "WINDOWS-866","WIN866"},
+ {0x37, 850, "English OEM (U.S.)*", "CP850",""},
+ {0x40, 852, "Romanian OEM", "CP852",""},
+ {0x4D, 936, "Chinese GBK (PRC)", "CP936",""},
+ {0x4E, 949, "Korean (ANSI/OEM)", "CP949",""},
+ {0x4F, 950, "Chinese Big 5 (Taiwan)", "CP950","BIG5"},
+ {0x50, 874, "Thai (ANSI/OEM)", "WIN874",""},
+ {0x57, 1252, "ANSI", "WINDOWS-1252",""},
+ {0x58, 1252, "Western European ANSI", "WINDOWS-1252",""},
+ {0x59, 1252, "Spanish ANSI", "WINDOWS-1252",""},
+ {0x64, 852, "Eastern European MS-DOS", "CP852",""},
+ {0x65, 866, "Russian MS-DOS", "CP866",""},
+ {0x66, 865, "Nordic MS-DOS", "CP865",""},
+ {0x67, 861, "Icelandic MS-DOS", "",""},
+ {0x6A, 737, "Greek MS-DOS (437G)", "CP737",""},
+ {0x6B, 857, "Turkish MS-DOS", "CP857",""},
+ {0x6C, 863, "French-Canadian MS-DOS", "CP863",""},
+ {0x78, 950, "Taiwan Big 5", "CP950",""},
+ {0x79, 949, "Hangul (Wansung)", "CP949",""},
+ {0x7A, 936, "PRC GBK", "CP936","GBK"},
+ {0x7B, 932, "Japanese Shift-JIS", "CP932",""},
+ {0x7C, 874, "Thai Windows/MS-DOS", "WINDOWS-874","WIN874"},
+ {0x86, 737, "Greek OEM", "CP737",""},
+ {0x87, 852, "Slovenian OEM", "CP852",""},
+ {0x88, 857, "Turkish OEM", "CP857",""},
+ {0xC8, 1250, "Eastern European Windows", "WINDOWS-1250","WIN1250"},
+ {0xC9, 1251, "Russian Windows", "WINDOWS-1251","WIN1251"},
+ {0xCA, 1254, "Turkish Windows", "WINDOWS-1254","WIN1254"},
+ {0xCB, 1253, "Greek Windows", "WINDOWS-1253","WIN1253"},
+ {0xCC, 1257, "Baltic Window", "WINDOWS-1257","WIN1257"},
+ {0xFF, 65001, "UTF-8", "UTF-8","UTF8"}
+};
+
+
+
typedef struct shp_connection_state
{
/* PgSQL username to log in with */
const char *colmap_pg_by_dbf(colmap *map, const char *dbfname);
+char *codepage2encoding(const char *cpg);
+char *encoding2codepage(const char *encoding);
+
#endif