Changes submitted by Peter Funk (some fixes/additions by B.Warsaw) to

author Barry Warsaw <barry@python.org>

Sat, 26 Feb 2000 20:56:47 +0000 (20:56 +0000)

committer Barry Warsaw <barry@python.org>

Sat, 26 Feb 2000 20:56:47 +0000 (20:56 +0000)
author Barry Warsaw <barry@python.org>
Sat, 26 Feb 2000 20:56:47 +0000 (20:56 +0000)
committer Barry Warsaw <barry@python.org>
Sat, 26 Feb 2000 20:56:47 +0000 (20:56 +0000)
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py

index fcd6b9512d398f432a537208d97b1f09454222e7..4ff4962d62b0024558bb8a7ef4146c81c2d4b9c0 100755 (executable)
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -1,5 +1,8 @@
  #! /usr/bin/env python
  # Originally written by Barry Warsaw <bwarsaw@python.org>
+#
+# minimally patched to make it even more xgettext compatible 
+# by Peter Funk <pf@artcom-gmbh.de>
  
  """pygettext -- Python equivalent of xgettext(1)
  
@@ -35,7 +38,8 @@ below for how to augment this.
   [2] http://www.gnu.org/software/gettext/gettext.html
  
  NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
-where ever possible.
+where ever possible.  However some options are still missing or are not fully
+implemented.
  
  Usage: pygettext [options] filename ...
  
@@ -45,9 +49,17 @@ Options:
      --extract-all
          Extract all strings
  
-    -d default-domain
-    --default-domain=default-domain
-        Rename the default output file from messages.pot to default-domain.pot 
+    -d name
+    --default-domain=name
+        Rename the default output file from messages.pot to name.pot 
+
+    -E
+    --escape
+        replace non-ASCII characters with octal escape sequences.
+
+    -h
+    --help
+        print this help message and exit
  
      -k [word]
      --keyword[=word]
@@ -73,13 +85,31 @@ Options:
          If style is omitted, Gnu is used.  The style name is case
          insensitive.  By default, locations are included.
  
+    -o filename
+    --output=filename
+        Rename the default output file from messages.pot to filename.
+
+    -p dir
+    --output-dir=dir
+        Output files will be placed in directory dir.
+
      -v
      --verbose
          Print the names of the files being processed.
  
-    --help
-    -h
-        print this help message and exit
+    -V
+    --version
+        Print the version of pygettext and exit.
+
+    -w columns
+    --width=columns
+        Set width of output to columns.
+
+    -x filename
+    --exclude-file=filename
+        Specify a file that contains a list of strings that are not be
+        extracted from the input files.  Each string to be excluded must
+        appear on a line by itself in the file.
  
  """
  
@@ -90,12 +120,16 @@ import time
  import getopt
  import tokenize
  
-__version__ = '0.2'
+__version__ = '1.0'
  
  
  \f
  # for selftesting
-def _(s): return s
+try:
+    import fintl
+    _ = fintl.gettext
+except ImportError:
+    def _(s): return s
  
  
  # The normal pot-file header. msgmerge and EMACS' po-mode work better if
@@ -125,21 +159,31 @@ def usage(code, msg=''):
          print msg
      sys.exit(code)
  
+
  \f
  escapes = []
-for i in range(256):
-    if i < 32 or i > 127:
-        escapes.append("\\%03o" % i)
-    else:
-        escapes.append(chr(i))
  
-escapes[ord('\\')] = '\\\\'
-escapes[ord('\t')] = '\\t'
-escapes[ord('\r')] = '\\r'
-escapes[ord('\n')] = '\\n'
-escapes[ord('\"')] = '\\"'
+def make_escapes(pass_iso8859):
+    global escapes
+    for i in range(256):
+        if pass_iso8859:
+            # Allow iso-8859 characters to pass through so that e.g. 'msgid
+            # "Höhe"' would result not result in 'msgid "H\366he"'.  Otherwise
+            # we escape any character outside the 32..126 range.
+            i = i % 128
+        if 32 <= i <= 126:
+            escapes.append(chr(i))
+        else:
+            escapes.append("\\%03o" % i)
+    escapes[ord('\\')] = '\\\\'
+    escapes[ord('\t')] = '\\t'
+    escapes[ord('\r')] = '\\r'
+    escapes[ord('\n')] = '\\n'
+    escapes[ord('\"')] = '\\"'
+
  
  def escape(s):
+    global escapes
      s = list(s)
      for i in range(len(s)):
          s[i] = escapes[ord(s[i])]
@@ -200,12 +244,13 @@ class TokenEater:
              # were no strings inside _(), then just ignore this entry.
              if self.__data:
                  msg = string.join(self.__data, '')
-                entry = (self.__curfile, self.__lineno)
-                linenos = self.__messages.get(msg)
-                if linenos is None:
-                    self.__messages[msg] = [entry]
-                else:
-                    linenos.append(entry)
+                if not msg in self.__options.toexclude:
+                    entry = (self.__curfile, self.__lineno)
+                    linenos = self.__messages.get(msg)
+                    if linenos is None:
+                        self.__messages[msg] = [entry]
+                    else:
+                        linenos.append(entry)
              self.__state = self.__waiting
          elif ttype == tokenize.STRING:
              self.__data.append(safe_eval(tstring))
@@ -222,20 +267,30 @@ class TokenEater:
              sys.stdout = fp
              # The time stamp in the header doesn't have the same format
              # as that generated by xgettext...
-            print pot_header % {'time': timestamp, 'version':__version__}
+            print pot_header % {'time': timestamp, 'version': __version__}
              for k, v in self.__messages.items():
-                for filename, lineno in v:
-                    # location comments are different b/w Solaris and GNU
-                    d = {'filename': filename,
-                         'lineno': lineno}
-                    if options.location == options.SOLARIS:
+                # location comments are different b/w Solaris and GNU:
+                if options.location == options.SOLARIS:
+                    for filename, lineno in v:
+                        d = {'filename': filename, 'lineno': lineno}
                          print _('# File: %(filename)s, line: %(lineno)d') % d
-                    elif options.location == options.GNU:
-                        print _('#: %(filename)s:%(lineno)d') % d
+                elif options.location == options.GNU:
+                    # fit as many locations on one line, as long as the
+                    # resulting line length doesn't exceeds 'options.width'
+                    locline = '#:'
+                    for filename, lineno in v:
+                        d = {'filename': filename, 'lineno': lineno}
+                        s = _(' %(filename)s:%(lineno)d') % d
+                        if len(locline) + len(s) <= options.width:
+                            locline = locline + s
+                        else:
+                            print locline
+                            locline = "#:" + s
+                    if len(locline) > 2:
+                        print locline
                  # TBD: sorting, normalizing
                  print 'msgid', normalize(k)
-                print 'msgstr ""'
-                print
+                print 'msgstr ""\n'
          finally:
              sys.stdout = sys.__stdout__
  
@@ -245,9 +300,11 @@ def main():
      try:
          opts, args = getopt.getopt(
              sys.argv[1:],
-            'k:d:n:hv',
-            ['keyword', 'default-domain', 'help',
-             'add-location=', 'no-location', 'verbose'])
+            'ad:Ehk:n:o:p:Vvw:x:',
+            ['extract-all', 'default-domain', 'escape', 'help', 'keyword',
+             'add-location', 'no-location', 'output=', 'output-dir=',
+             'verbose', 'version', 'width=', 'exclude-file=',
+             ])
      except getopt.error, msg:
          usage(1, msg)
  
@@ -257,10 +314,15 @@ def main():
          GNU = 1
          SOLARIS = 2
          # defaults
+        extractall = 0 # FIXME: currently this option has no effect at all.
+        escape = 0
          keywords = []
+        outpath = ''
          outfile = 'messages.pot'
          location = GNU
          verbose = 0
+        width = 78
+        excludefilename = ''
  
      options = Options()
      locations = {'gnu' : options.GNU,
@@ -271,12 +333,16 @@ def main():
      for opt, arg in opts:
          if opt in ('-h', '--help'):
              usage(0)
+        elif opt in ('-a', '--extract-all'):
+            options.extractall = 1
+        elif opt in ('-d', '--default-domain'):
+            options.outfile = arg + '.pot'
+        elif opt in ('-E', '--escape'):
+            options.escape = 1
          elif opt in ('-k', '--keyword'):
              if arg is None:
                  default_keywords = []
              options.keywords.append(arg)
-        elif opt in ('-d', '--default-domain'):
-            options.outfile = arg + '.pot'
          elif opt in ('-n', '--add-location'):
              if arg is None:
                  arg = 'gnu'
@@ -287,12 +353,44 @@ def main():
                  usage(1, _('Invalid value for --add-location: %(arg)s') % d)
          elif opt in ('--no-location',):
              options.location = 0
+        elif opt in ('-o', '--output'):
+            options.outfile = arg
+        elif opt in ('-p', '--output-dir'):
+            options.outpath = arg
          elif opt in ('-v', '--verbose'):
              options.verbose = 1
+        elif opt in ('-V', '--version'):
+            print _('pygettext.py (xgettext for Python) %s') % __version__
+            sys.exit(0)
+        elif opt in ('-w', '--width'):
+            try:
+                options.width = int(arg)
+            except ValueError:
+                d = {'arg':arg}
+                usage(1, _('Invalid value for --width: %(arg)s, must be int')
+                      % d)
+        elif opt in ('-x', '--exclude-file'):
+            options.excludefilename = arg
+
+    # calculate escapes
+    make_escapes(options.escapes)
  
      # calculate all keywords
      options.keywords.extend(default_keywords)
  
+    # initialize list of strings to exclude
+    if options.excludefilename:
+        try:
+            fp = open(options.excludefilename)
+            options.toexclude = fp.readlines()
+            fp.close()
+        except IOError:
+            sys.stderr.write(_("Can't read --exclude-file: %s") %
+                             options.excludefilename)
+            sys.exit(1)
+    else:
+        options.toexclude = []
+
      # slurp through all the files
      eater = TokenEater(options)
      for filename in args:
@@ -303,6 +401,8 @@ def main():
          tokenize.tokenize(fp.readline, eater)
          fp.close()
  
+    if options.outpath:
+        options.outfile = os.path.join(options.outpath, options.outfile)
      fp = open(options.outfile, 'w')
      eater.write(fp)
      fp.close()
author	Barry Warsaw <barry@python.org>
	Sat, 26 Feb 2000 20:56:47 +0000 (20:56 +0000)
committer	Barry Warsaw <barry@python.org>
	Sat, 26 Feb 2000 20:56:47 +0000 (20:56 +0000)