struct unknown_encoding {
struct normal_encoding normal;
- unsigned short (*convert)(void *userData, const char *p);
+ int (*convert)(void *userData, const char *p);
void *userData;
unsigned short utf16[256];
unsigned char utf8[256][4];
static
int unknown_isName(const ENCODING *enc, const char *p)
{
- unsigned short c = ((const struct unknown_encoding *)enc)
- ->convert(((const struct unknown_encoding *)enc)->userData, p);
+ int c = ((const struct unknown_encoding *)enc)
+ ->convert(((const struct unknown_encoding *)enc)->userData, p);
+ if (c & ~0xFFFF)
+ return 0;
return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
}
static
int unknown_isNmstrt(const ENCODING *enc, const char *p)
{
- unsigned short c = ((const struct unknown_encoding *)enc)
- ->convert(((const struct unknown_encoding *)enc)->userData, p);
+ int c = ((const struct unknown_encoding *)enc)
+ ->convert(((const struct unknown_encoding *)enc)->userData, p);
+ if (c & ~0xFFFF)
+ return 0;
return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
}
static
int unknown_isInvalid(const ENCODING *enc, const char *p)
{
- return ((const struct unknown_encoding *)enc)
- ->convert(((const struct unknown_encoding *)enc)->userData, p) == 0;
+ int c = ((const struct unknown_encoding *)enc)
+ ->convert(((const struct unknown_encoding *)enc)->userData, p);
+ return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
}
static
utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP];
n = *utf8++;
if (n == 0) {
- unsigned short c
- = ((const struct unknown_encoding *)enc)
- ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
+ int c = ((const struct unknown_encoding *)enc)
+ ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
n = XmlUtf8Encode(c, buf);
if (n > toLim - *toP)
break;
unsigned short c
= ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP];
if (c == 0) {
- c = ((const struct unknown_encoding *)enc)
+ c = (unsigned short)((const struct unknown_encoding *)enc)
->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
*fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2);
ENCODING *
XmlInitUnknownEncoding(void *mem,
- unsigned short *table,
- unsigned short (*convert)(void *userData, const char *p),
+ int *table,
+ int (*convert)(void *userData, const char *p),
void *userData)
{
int i;
&& table[i] != i)
return 0;
for (i = 0; i < 256; i++) {
- unsigned short c = table[i];
- if (c < 0x80) {
+ int c = table[i];
+ if (c == -1)
+ c = 0xFFFF;
+ if (c < 0) {
+ if (c < -4)
+ return 0;
+ e->normal.type[i] = BT_LEAD2 - (c + 2);
+ e->utf8[i][0] = 0;
+ e->utf16[i] = 0;
+ }
+ else if (c < 0x80) {
if (latin1_encoding.type[c] != BT_OTHER
&& latin1_encoding.type[c] != BT_NONXML
&& c != i)
- return 0;
- if (c >= 2 && c <= 4) {
- e->normal.type[i] = BT_LEAD2 + (c - 2);
- e->utf8[i][0] = 0;
- e->utf16[i] = 0;
- }
- else {
- e->normal.type[i] = latin1_encoding.type[c];
- e->utf8[i][0] = 1;
- e->utf8[i][1] = (char)c;
- e->utf16[i] = c == 0 ? 0xFFFF : c;
- }
+ return 0;
+ e->normal.type[i] = latin1_encoding.type[c];
+ e->utf8[i][0] = 1;
+ e->utf8[i][1] = (char)c;
+ e->utf16[i] = c == 0 ? 0xFFFF : c;
}
else if (checkCharRefNumber(c) < 0) {
e->normal.type[i] = BT_NONXML;
e->utf8[i][1] = 0;
}
else {
+ if (c > 0xFFFF)
+ return 0;
if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
e->normal.type[i] = BT_NMSTRT;
else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
#ifdef WIN32
#include <windows.h>
-int codepageMap(int cp, unsigned short *map)
+int codepageMap(int cp, int *map)
{
int i;
CPINFO info;
if (!GetCPInfo(cp, &info) || info.MaxCharSize > 2)
return 0;
for (i = 0; i < 256; i++)
- map[i] = 0;
+ map[i] = -1;
if (info.MaxCharSize > 1) {
for (i = 0; i < MAX_LEADBYTES; i++) {
int j, lim;
break;
lim = info.LeadByte[i + 1];
for (j = info.LeadByte[i]; j < lim; j++)
- map[j] = 2;
+ map[j] = -2;
}
}
for (i = 0; i < 256; i++) {
- if (map[i] == 0) {
+ if (map[i] == -1) {
char c = i;
+ unsigned short n;
if (MultiByteToWideChar(cp, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
- &c, 1, map + i, 1) == 0)
- map[i] = 0;
+ &c, 1, &n, 1) == 1)
+ map[i] = n;
}
}
return 1;
}
-unsigned short codepageConvert(int cp, const char *p)
+int codepageConvert(int cp, const char *p)
{
unsigned short c;
if (MultiByteToWideChar(cp, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
p, 2, &c, 1) == 1)
return c;
- return 0;
+ return -1;
}
#else /* not WIN32 */
unsigned short codepageConvert(int cp, const char *p)
{
- return 0;
+ return -1;
}
#endif /* not WIN32 */