utf16_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
static int
is_valid_mbc_string(const UChar* s, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_UTF16_BE, s, end);
+ while (s < end) {
+ int len = utf16be_mbc_enc_len(s);
+ if (len == 4) {
+ if (s + 2 >= end)
+ return FALSE;
+ if (! UTF16_IS_SURROGATE_SECOND(*(s+2)))
+ return FALSE;
+ }
+ else
+ if (UTF16_IS_SURROGATE_SECOND(*s))
+ return FALSE;
+
+ s += len;
+ }
+
+ if (s != end)
+ return FALSE;
+ else
+ return TRUE;
}
static int
s--;
}
- if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1)
+ if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1 &&
+ UTF16_IS_SURROGATE_FIRST(*(s-2)))
s -= 2;
return (UChar* )s;
utf16_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
const UChar* end1 = end - 1;
while (p < end1) {
- p += utf16le_mbc_enc_len(p);
+ int len = utf16le_mbc_enc_len(p);
+ if (len == 4) {
+ if (p + 3 < end && ! UTF16_IS_SURROGATE_SECOND(*(p + 3)))
+ return FALSE;
+ }
+ else
+ if (UTF16_IS_SURROGATE_SECOND(*(p + 1)))
+ return FALSE;
+
+ p += len;
}
if (p != end)
s--;
}
- if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
+ if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1 &&
+ UTF16_IS_SURROGATE_FIRST(*(s-1)))
s -= 2;
return (UChar* )s;