Port to Python 3.

author Martin v. Löwis <martin@v.loewis.de>

Fri, 4 Jun 2010 18:40:55 +0000 (18:40 +0000)

committer Martin v. Löwis <martin@v.loewis.de>

Fri, 4 Jun 2010 18:40:55 +0000 (18:40 +0000)
author Martin v. Löwis <martin@v.loewis.de>
Fri, 4 Jun 2010 18:40:55 +0000 (18:40 +0000)
committer Martin v. Löwis <martin@v.loewis.de>
Fri, 4 Jun 2010 18:40:55 +0000 (18:40 +0000)
diff --git a/Tools/i18n/msgfmt.py b/Tools/i18n/msgfmt.py

index 6239f8bba4cd394be00b2e545412587d53bd4efc..b3e6bec777d7d5fde73e463566da6d222420184a 100755 (executable)
--- a/Tools/i18n/msgfmt.py
+++ b/Tools/i18n/msgfmt.py
@@ -30,6 +30,7 @@ import os
  import getopt
  import struct
  import array
+from email.parser import HeaderParser
  
  __version__ = "1.1"
  
@@ -59,13 +60,13 @@ def generate():
      # the keys are sorted in the .mo file
      keys = sorted(MESSAGES.keys())
      offsets = []
-    ids = strs = ''
+    ids = strs = b''
      for id in keys:
          # For each string, we need size and file offset.  Each string is NUL
          # terminated; the NUL does not count into the size.
          offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
-        ids += id + '\0'
-        strs += MESSAGES[id] + '\0'
+        ids += id + b'\0'
+        strs += MESSAGES[id] + b'\0'
      output = ''
      # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
      # the keys start right after the index tables.
@@ -108,7 +109,7 @@ def make(filename, outfile):
          outfile = os.path.splitext(infile)[0] + '.mo'
  
      try:
-        lines = open(infile).readlines()
+        lines = open(infile, 'rb').readlines()
      except IOError as msg:
          print(msg, file=sys.stderr)
          sys.exit(1)
@@ -116,9 +117,14 @@ def make(filename, outfile):
      section = None
      fuzzy = 0
  
+    # Start off assuming Latin-1, so everything decodes without failure,
+    # until we know the exact encoding
+    encoding = 'latin-1'
+
      # Parse the catalog
      lno = 0
      for l in lines:
+        l = l.decode(encoding)
          lno += 1
          # If we get a comment line after a msgstr, this is a new entry
          if l[0] == '#' and section == STR:
@@ -135,9 +141,15 @@ def make(filename, outfile):
          if l.startswith('msgid') and not l.startswith('msgid_plural'):
              if section == STR:
                  add(msgid, msgstr, fuzzy)
+                if not msgid:
+                    # See whether there is an encoding declaration
+                    p = HeaderParser()
+                    charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
+                    if charset:
+                        encoding = charset
              section = ID
              l = l[5:]
-            msgid = msgstr = ''
+            msgid = msgstr = b''
              is_plural = False
          # This is a message with plural forms
          elif l.startswith('msgid_plural'):
@@ -146,7 +158,7 @@ def make(filename, outfile):
                        file=sys.stderr)
                  sys.exit(1)
              l = l[12:]
-            msgid += '\0' # separator of singular and plural
+            msgid += b'\0' # separator of singular and plural
              is_plural = True
          # Now we are in a msgstr section
          elif l.startswith('msgstr'):
@@ -158,7 +170,7 @@ def make(filename, outfile):
                      sys.exit(1)
                  l = l.split(']', 1)[1]
                  if msgstr:
-                    msgstr += '\0' # Separator of the various plural forms
+                    msgstr += b'\0' # Separator of the various plural forms
              else:
                  if is_plural:
                      print(sys.stderr, 'indexed msgstr required for plural on  %s:%d' % (infile, lno),
@@ -172,9 +184,9 @@ def make(filename, outfile):
          # XXX: Does this always follow Python escape semantics?
          l = eval(l)
          if section == ID:
-            msgid += l
+            msgid += l.encode(encoding)
          elif section == STR:
-            msgstr += l
+            msgstr += l.encode(encoding)
          else:
              print('Syntax error on %s:%d' % (infile, lno), \
                    'before:', file=sys.stderr)
author	Martin v. Löwis <martin@v.loewis.de>
	Fri, 4 Jun 2010 18:40:55 +0000 (18:40 +0000)
committer	Martin v. Löwis <martin@v.loewis.de>
	Fri, 4 Jun 2010 18:40:55 +0000 (18:40 +0000)