- Removed out-of-date comments and one enum constant from html_tables.h and its gener...

author Gustavo André dos Santos Lopes <cataphract@php.net>

Sun, 24 Oct 2010 19:05:21 +0000 (19:05 +0000)

committer Gustavo André dos Santos Lopes <cataphract@php.net>

Sun, 24 Oct 2010 19:05:21 +0000 (19:05 +0000)
author Gustavo André dos Santos Lopes <cataphract@php.net>
Sun, 24 Oct 2010 19:05:21 +0000 (19:05 +0000)
committer Gustavo André dos Santos Lopes <cataphract@php.net>
Sun, 24 Oct 2010 19:05:21 +0000 (19:05 +0000)
diff --git a/ext/standard/html_tables.h b/ext/standard/html_tables.h

index 8d4de82c5ae19fb440518e99ac964a19e06bbf9b..f9674a1c37b364f42b4d68471b8ae84110b15dfe 100644 (file)
--- a/ext/standard/html_tables.h
+++ b/ext/standard/html_tables.h
@@ -1,4 +1,4 @@
-/*
+/*
     +----------------------------------------------------------------------+
     | PHP Version 5                                                        |
     +----------------------------------------------------------------------+
@@ -28,17 +28,9 @@
  ***************************************************************************
  **************************************************************************/
  
-/* cs_terminator is overloaded in the following fashion:
- * - It terminates the list entity maps.
- * - In BG(inverse_ent_maps), it's the key of the inverse map that stores
- *   only the basic entities.
- * - When passed to traverse_for_entities (or via php_unescape_entities with !all),
- *   we don't care about the encoding (UTF-8 is chosen, but it should be used
- *   when it doesn't matter).
- */
-enum entity_charset { cs_terminator, cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15,
-                                         cs_cp1251, cs_8859_5, cs_cp866, cs_macroman, cs_koi8r,
-                                         cs_big5, cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
+enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251,
+                                         cs_8859_5, cs_cp866, cs_macroman, cs_koi8r, cs_big5,
+                                         cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
                                           cs_numelems /* used to count the number of charsets */
                                         };
  #define CHARSET_UNICODE_COMPAT(cs)     ((cs) <= cs_utf_8)
@@ -49,36 +41,36 @@ static const struct {
         const char *codeset;
         enum entity_charset charset;
  } charset_map[] = {
-       { "ISO-8859-1",         cs_8859_1 },
-       { "ISO8859-1",          cs_8859_1 },
-       { "ISO-8859-15",        cs_8859_15 },
-       { "ISO8859-15",         cs_8859_15 },
-       { "utf-8",                      cs_utf_8 },
+       { "ISO-8859-1",         cs_8859_1 },
+       { "ISO8859-1",          cs_8859_1 },
+       { "ISO-8859-15",        cs_8859_15 },
+       { "ISO8859-15",         cs_8859_15 },
+       { "utf-8",                      cs_utf_8 },
         { "cp1252",             cs_cp1252 },
-       { "Windows-1252",       cs_cp1252 },
-       { "1252",           cs_cp1252 }, 
+       { "Windows-1252",       cs_cp1252 },
+       { "1252",                       cs_cp1252 }, 
         { "BIG5",                       cs_big5 },
-       { "950",            cs_big5 },
+       { "950",                        cs_big5 },
         { "GB2312",                     cs_gb2312 },
-       { "936",            cs_gb2312 },
+       { "936",                        cs_gb2312 },
         { "BIG5-HKSCS",         cs_big5hkscs },
         { "Shift_JIS",          cs_sjis },
-       { "SJIS",               cs_sjis },
-       { "932",            cs_sjis },
-       { "EUCJP",              cs_eucjp },
-       { "EUC-JP",             cs_eucjp },
-       { "KOI8-R",         cs_koi8r },
-       { "koi8-ru",        cs_koi8r },
-       { "koi8r",          cs_koi8r },
-       { "cp1251",         cs_cp1251 },
-       { "Windows-1251",   cs_cp1251 },
-       { "win-1251",       cs_cp1251 },
-       { "iso8859-5",      cs_8859_5 },
-       { "iso-8859-5",     cs_8859_5 },
-       { "cp866",          cs_cp866 },
-       { "866",            cs_cp866 },    
-       { "ibm866",         cs_cp866 },
-       { "MacRoman",       cs_macroman },
+       { "SJIS",                       cs_sjis },
+       { "932",                        cs_sjis },
+       { "EUCJP",                      cs_eucjp },
+       { "EUC-JP",                     cs_eucjp },
+       { "KOI8-R",                     cs_koi8r },
+       { "koi8-ru",            cs_koi8r },
+       { "koi8r",                      cs_koi8r },
+       { "cp1251",                     cs_cp1251 },
+       { "Windows-1251",       cs_cp1251 },
+       { "win-1251",           cs_cp1251 },
+       { "iso8859-5",          cs_8859_5 },
+       { "iso-8859-5",         cs_8859_5 },
+       { "cp866",                      cs_cp866 },
+       { "866",                        cs_cp866 },    
+       { "ibm866",                     cs_cp866 },
+       { "MacRoman",           cs_macroman },
         { NULL }
  };
  
@@ -475,7 +467,6 @@ static const enc_to_uni enc_to_uni_macroman = {
  
  /* {{{ Index of tables for encoding conversion */
  static const enc_to_uni *const enc_to_uni_index[cs_numelems] = {
-       NULL,
         NULL,
         &enc_to_uni_iso88591,
         &enc_to_uni_win1252,
@@ -1144,7 +1135,7 @@ typedef struct {
         const entity_stage3_row *table;
  } entity_table_opt;
  
-/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistentcy's sake. */
+/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistency's sake. */
  
  /* {{{ Start of HTML5 multi-stage table for codepoint -> entity */
  
diff --git a/ext/standard/html_tables/html_table_gen.php b/ext/standard/html_tables/html_table_gen.php

index 35be2d9afcb9b0094f396f7d8005f6fe64f1150c..f095202bc6ac6cfb6e796db8f4b88e5b1ee50883 100644 (file)
--- a/ext/standard/html_tables/html_table_gen.php
+++ b/ext/standard/html_tables/html_table_gen.php
@@ -51,17 +51,9 @@ $t = <<<CODE
  ***************************************************************************
  **************************************************************************/
  
-/* cs_terminator is overloaded in the following fashion:
- * - It terminates the list entity maps.
- * - In BG(inverse_ent_maps), it's the key of the inverse map that stores
- *   only the basic entities.
- * - When passed to traverse_for_entities (or via php_unescape_entities with !all),
- *   we don't care about the encoding (UTF-8 is chosen, but it should be used
- *   when it doesn't matter).
- */
-enum entity_charset { cs_terminator, cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15,
-                                         cs_cp1251, cs_8859_5, cs_cp866, cs_macroman, cs_koi8r,
-                                         cs_big5, cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
+enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251,
+                                         cs_8859_5, cs_cp866, cs_macroman, cs_koi8r, cs_big5,
+                                         cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
                                           cs_numelems /* used to count the number of charsets */
                                         };
  #define CHARSET_UNICODE_COMPAT(cs)     ((cs) <= cs_utf_8)
@@ -72,36 +64,36 @@ static const struct {
         const char *codeset;
         enum entity_charset charset;
  } charset_map[] = {
-       { "ISO-8859-1",         cs_8859_1 },
-       { "ISO8859-1",          cs_8859_1 },
-       { "ISO-8859-15",        cs_8859_15 },
-       { "ISO8859-15",         cs_8859_15 },
-       { "utf-8",                      cs_utf_8 },
+       { "ISO-8859-1",         cs_8859_1 },
+       { "ISO8859-1",          cs_8859_1 },
+       { "ISO-8859-15",        cs_8859_15 },
+       { "ISO8859-15",         cs_8859_15 },
+       { "utf-8",                      cs_utf_8 },
         { "cp1252",             cs_cp1252 },
-       { "Windows-1252",       cs_cp1252 },
-       { "1252",           cs_cp1252 }, 
+       { "Windows-1252",       cs_cp1252 },
+       { "1252",                       cs_cp1252 }, 
         { "BIG5",                       cs_big5 },
-       { "950",            cs_big5 },
+       { "950",                        cs_big5 },
         { "GB2312",                     cs_gb2312 },
-       { "936",            cs_gb2312 },
+       { "936",                        cs_gb2312 },
         { "BIG5-HKSCS",         cs_big5hkscs },
         { "Shift_JIS",          cs_sjis },
-       { "SJIS",               cs_sjis },
-       { "932",            cs_sjis },
-       { "EUCJP",              cs_eucjp },
-       { "EUC-JP",             cs_eucjp },
-       { "KOI8-R",         cs_koi8r },
-       { "koi8-ru",        cs_koi8r },
-       { "koi8r",          cs_koi8r },
-       { "cp1251",         cs_cp1251 },
-       { "Windows-1251",   cs_cp1251 },
-       { "win-1251",       cs_cp1251 },
-       { "iso8859-5",      cs_8859_5 },
-       { "iso-8859-5",     cs_8859_5 },
-       { "cp866",          cs_cp866 },
-       { "866",            cs_cp866 },    
-       { "ibm866",         cs_cp866 },
-       { "MacRoman",       cs_macroman },
+       { "SJIS",                       cs_sjis },
+       { "932",                        cs_sjis },
+       { "EUCJP",                      cs_eucjp },
+       { "EUC-JP",                     cs_eucjp },
+       { "KOI8-R",                     cs_koi8r },
+       { "koi8-ru",            cs_koi8r },
+       { "koi8r",                      cs_koi8r },
+       { "cp1251",                     cs_cp1251 },
+       { "Windows-1251",       cs_cp1251 },
+       { "win-1251",           cs_cp1251 },
+       { "iso8859-5",          cs_8859_5 },
+       { "iso-8859-5",         cs_8859_5 },
+       { "cp866",                      cs_cp866 },
+       { "866",                        cs_cp866 },    
+       { "ibm866",                     cs_cp866 },
+       { "MacRoman",           cs_macroman },
         { NULL }
  };
  
@@ -132,51 +124,51 @@ echo $t;
  $encodings = array(
      array(
          "ident" => "iso88591",
-        "enumid" => 2,
+        "enumid" => 1,
          "name" => "ISO-8859-1",
          "file" => "mappings/8859-1.TXT",
      ),
      array(
          "ident" => "iso88595",
-        "enumid" => 6,
+        "enumid" => 5,
          "name" => "ISO-8859-5",
          "file" => "mappings/8859-5.TXT",
      ),
      array(
          "ident" => "iso885915",
-        "enumid" => 4,
+        "enumid" => 3,
          "name" => "ISO-8859-15",
          "file" => "mappings/8859-15.TXT",
      ),
      array(
          "ident" => "win1252",
-        "enumid" => 3,
+        "enumid" => 2,
          "enumident" => "cp1252",
          "name" => "Windows-1252",
          "file" => "mappings/CP1252.TXT",
      ),
      array(
          "ident" => "win1251",
-        "enumid" => 5,
+        "enumid" => 4,
          "enumident" => "cp1252",
          "name" => "Windows-1251",
          "file" => "mappings/CP1251.TXT",
      ),
      array(
          "ident" => "koi8r",
-        "enumid" => 9,
+        "enumid" => 8,
          "name" => "KOI8-R",
          "file" => "mappings/KOI8-R.TXT",
      ),
      array(
          "ident" => "cp866",
-        "enumid" => 7,
+        "enumid" => 6,
          "name" => "CP-866",
          "file" => "mappings/CP866.TXT",
      ),
      array(
          "ident" => "macroman",
-        "enumid" => 8,
+        "enumid" => 7,
          "name" => "MacRoman",
          "file" => "mappings/ROMAN.TXT",
      ),
@@ -336,7 +328,7 @@ foreach ($encodings as $e) {
      $lines = explode("\n", file_get_contents($e{'file'}));
      foreach ($lines as $l) {
          if (preg_match("/^0x([0-9A-Z]{2})\t0x([0-9A-Z]{2,})\s+#\s*(.*)$/i", $l, $matches))
-            $map[] = array($matches[1], $matches[2], $matches[3]);
+            $map[] = array($matches[1], $matches[2], rtrim($matches[3]));
      }
      
      $mappy = array();
@@ -420,7 +412,7 @@ typedef struct {
         const entity_stage3_row *table;
  } entity_table_opt;
  
-/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistentcy's sake. */
+/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistency's sake. */
  
  
  CODE;
author	Gustavo André dos Santos Lopes <cataphract@php.net>
	Sun, 24 Oct 2010 19:05:21 +0000 (19:05 +0000)
committer	Gustavo André dos Santos Lopes <cataphract@php.net>
	Sun, 24 Oct 2010 19:05:21 +0000 (19:05 +0000)
ext/standard/html_tables.h		patch \| blob \| history
ext/standard/html_tables/html_table_gen.php		patch \| blob \| history