]> granicus.if.org Git - libexpat/commitdiff
xmltok.c: Avoid copying of partial characters for UTF-8 input (#115)
authorSebastian Pipping <sebastian@pipping.org>
Thu, 10 Aug 2017 19:49:48 +0000 (21:49 +0200)
committerSebastian Pipping <sebastian@pipping.org>
Fri, 11 Aug 2017 11:07:52 +0000 (13:07 +0200)
Also make use of memcpy
Second take

expat/Changes
expat/lib/xmltok.c

index 54b08e5e01936f6793de1810b303c4aec9850ee8..9d544c84cb487f45aaf39ee40a203a4309ce562d 100644 (file)
@@ -3,6 +3,9 @@ NOTE: We are looking for help with a few things:
       If you can help, please get in touch.  Thanks!
 
 Release 2.?.? ?????????????????
+        Bug fixes:
+            #115  Fix potential copying of partial characters for UTF-8 input
+
         Other changes:
             #109  Fix "make check" for non-x86 architectures that default
                     to unsigned type char (-128..127 rather than 0..255)
@@ -19,6 +22,7 @@ Release 2.?.? ?????????????????
             Jakub Wilk
             Joe Orton
             Rolf Eike Beer
+            Lin Tian
 
 Release 2.2.3 Wed August 2 2017
         Security fixes:
index db4a5c8ca3efbbdbb814c52c810e43b07c7bde04..4b633f97da8b1a26ee2b26ef3a1fd982e6f3465a 100644 (file)
@@ -3,6 +3,8 @@
 */
 
 #include <stddef.h>
+#include <stdbool.h>
+#include <string.h>  // memcpy
 
 #ifdef _WIN32
 #include "winconfig.h"
@@ -363,22 +365,33 @@ utf8_toUtf8(const ENCODING *UNUSED_P(enc),
             const char **fromP, const char *fromLim,
             char **toP, const char *toLim)
 {
-  char *to;
-  const char *from;
-  const char *fromLimInitial = fromLim;
+  bool input_incomplete = false;
+  bool output_exhausted = false;
+
+  /* Avoid copying partial characters (due to limited space). */
+  const ptrdiff_t bytesAvailable = fromLim - *fromP;
+  const ptrdiff_t bytesStorable = toLim - *toP;
+  if (bytesAvailable > bytesStorable) {
+    fromLim = *fromP + bytesStorable;
+    output_exhausted = true;
+  }
 
-  /* Avoid copying partial characters. */
+  /* Avoid copying partial characters (from incomplete input). */
+  const char * const fromLimBefore = fromLim;
   align_limit_to_full_utf8_characters(*fromP, &fromLim);
+  if (fromLim < fromLimBefore) {
+    input_incomplete = true;
+  }
 
-  for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++)
-    *to = *from;
-  *fromP = from;
-  *toP = to;
+  const ptrdiff_t bytesToCopy = fromLim - *fromP;
+  memcpy((void *)*toP, (const void *)*fromP, (size_t)bytesToCopy);
+  *fromP += bytesToCopy;
+  *toP += bytesToCopy;
 
-  if (fromLim < fromLimInitial)
-    return XML_CONVERT_INPUT_INCOMPLETE;
-  else if ((to == toLim) && (from < fromLim))
+  if (output_exhausted)  // needs to go first
     return XML_CONVERT_OUTPUT_EXHAUSTED;
+  else if (input_incomplete)
+    return XML_CONVERT_INPUT_INCOMPLETE;
   else
     return XML_CONVERT_COMPLETED;
 }