Fix #80268: loadHTML() truncates at NUL bytes

author Christoph M. Becker <cmbecker69@gmx.de>

Fri, 23 Oct 2020 09:06:30 +0000 (11:06 +0200)

committer Christoph M. Becker <cmbecker69@gmx.de>

Mon, 26 Oct 2020 12:08:05 +0000 (13:08 +0100)
author Christoph M. Becker <cmbecker69@gmx.de>
Fri, 23 Oct 2020 09:06:30 +0000 (11:06 +0200)
committer Christoph M. Becker <cmbecker69@gmx.de>
Mon, 26 Oct 2020 12:08:05 +0000 (13:08 +0100)
diff --git a/NEWS b/NEWS

index 8a1ea004e049c9936e0acdaa7c316fc8a1983735..6d74ead2b9801ce2f39fd29ef64207d29a01ab8b 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -11,6 +11,9 @@ PHP                                                                        NEWS
  - COM:
    . Fixed bug #62474 (com_event_sink crashes on certain arguments). (cmb)
  
+- DOM:
+  . Fixed bug #80268 (loadHTML() truncates at NUL bytes). (cmb)
+
  - IMAP:
    . Fixed bug #64076 (imap_sort() does not return FALSE on failure). (cmb)
    . Fixed bug #76618 (segfault on imap_reopen). (girgias)
diff --git a/ext/dom/document.c b/ext/dom/document.c

index 22bb90d5d88db3620a553d17e688739253ebb371..0e15e7a1106522464a5979eae4f7965ed86031b2 100644 (file)
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -2024,7 +2024,6 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
                 }
                 ctxt = htmlCreateFileParserCtxt(source, NULL);
         } else {
-               source_len = xmlStrlen((xmlChar *) source);
                 if (ZEND_SIZE_T_INT_OVFL(source_len)) {
                         php_error_docref(NULL, E_WARNING, "Input string is too long");
                         RETURN_FALSE;
diff --git a/ext/dom/tests/bug80268.phpt b/ext/dom/tests/bug80268.phpt

new file mode 100644 (file)

index 0000000..0fe50b8
--- /dev/null
+++ b/ext/dom/tests/bug80268.phpt
@@ -0,0 +1,24 @@
+--TEST--
+Bug #80268 (loadHTML() truncates at NUL bytes)
+--SKIPIF--
+<?php require_once('skipif.inc'); ?>
+--FILE--
+<?php
+$doc = new DOMDocument;
+$doc->loadHTML("<p>foo\0bar</p>");
+$html = $doc->saveHTML();
+var_dump(strpos($html, '<p>foo</p>') !== false);
+
+file_put_contents(__DIR__ . '/80268.html', "<p>foo\0bar</p>");
+$doc = new DOMDocument;
+$doc->loadHTMLFile(__DIR__ . '/80268.html');
+$html = $doc->saveHTML();
+var_dump(strpos($html, '<p>foo</p>') !== false);
+?>
+--CLEAN--
+<?php
+unlink(__DIR__ . '/80268.html');
+?>
+--EXPECT--
+bool(true)
+bool(true)
author	Christoph M. Becker <cmbecker69@gmx.de>
	Fri, 23 Oct 2020 09:06:30 +0000 (11:06 +0200)
committer	Christoph M. Becker <cmbecker69@gmx.de>
	Mon, 26 Oct 2020 12:08:05 +0000 (13:08 +0100)
NEWS		patch \| blob \| history
ext/dom/document.c		patch \| blob \| history
ext/dom/tests/bug80268.phpt	[new file with mode: 0644]	patch \| blob