#! /usr/bin/env python
# Originally written by Barry Warsaw <bwarsaw@python.org>
+#
+# minimally patched to make it even more xgettext compatible
+# by Peter Funk <pf@artcom-gmbh.de>
"""pygettext -- Python equivalent of xgettext(1)
[2] http://www.gnu.org/software/gettext/gettext.html
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
-where ever possible.
+where ever possible. However some options are still missing or are not fully
+implemented.
Usage: pygettext [options] filename ...
--extract-all
Extract all strings
- -d default-domain
- --default-domain=default-domain
- Rename the default output file from messages.pot to default-domain.pot
+ -d name
+ --default-domain=name
+ Rename the default output file from messages.pot to name.pot
+
+ -E
+ --escape
+ replace non-ASCII characters with octal escape sequences.
+
+ -h
+ --help
+ print this help message and exit
-k [word]
--keyword[=word]
If style is omitted, Gnu is used. The style name is case
insensitive. By default, locations are included.
+ -o filename
+ --output=filename
+ Rename the default output file from messages.pot to filename.
+
+ -p dir
+ --output-dir=dir
+ Output files will be placed in directory dir.
+
-v
--verbose
Print the names of the files being processed.
- --help
- -h
- print this help message and exit
+ -V
+ --version
+ Print the version of pygettext and exit.
+
+ -w columns
+ --width=columns
+ Set width of output to columns.
+
+ -x filename
+ --exclude-file=filename
+ Specify a file that contains a list of strings that are not be
+ extracted from the input files. Each string to be excluded must
+ appear on a line by itself in the file.
"""
import getopt
import tokenize
-__version__ = '0.2'
+__version__ = '1.0'
\f
# for selftesting
-def _(s): return s
+try:
+ import fintl
+ _ = fintl.gettext
+except ImportError:
+ def _(s): return s
# The normal pot-file header. msgmerge and EMACS' po-mode work better if
print msg
sys.exit(code)
+
\f
escapes = []
-for i in range(256):
- if i < 32 or i > 127:
- escapes.append("\\%03o" % i)
- else:
- escapes.append(chr(i))
-escapes[ord('\\')] = '\\\\'
-escapes[ord('\t')] = '\\t'
-escapes[ord('\r')] = '\\r'
-escapes[ord('\n')] = '\\n'
-escapes[ord('\"')] = '\\"'
+def make_escapes(pass_iso8859):
+ global escapes
+ for i in range(256):
+ if pass_iso8859:
+ # Allow iso-8859 characters to pass through so that e.g. 'msgid
+ # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise
+ # we escape any character outside the 32..126 range.
+ i = i % 128
+ if 32 <= i <= 126:
+ escapes.append(chr(i))
+ else:
+ escapes.append("\\%03o" % i)
+ escapes[ord('\\')] = '\\\\'
+ escapes[ord('\t')] = '\\t'
+ escapes[ord('\r')] = '\\r'
+ escapes[ord('\n')] = '\\n'
+ escapes[ord('\"')] = '\\"'
+
def escape(s):
+ global escapes
s = list(s)
for i in range(len(s)):
s[i] = escapes[ord(s[i])]
# were no strings inside _(), then just ignore this entry.
if self.__data:
msg = string.join(self.__data, '')
- entry = (self.__curfile, self.__lineno)
- linenos = self.__messages.get(msg)
- if linenos is None:
- self.__messages[msg] = [entry]
- else:
- linenos.append(entry)
+ if not msg in self.__options.toexclude:
+ entry = (self.__curfile, self.__lineno)
+ linenos = self.__messages.get(msg)
+ if linenos is None:
+ self.__messages[msg] = [entry]
+ else:
+ linenos.append(entry)
self.__state = self.__waiting
elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
sys.stdout = fp
# The time stamp in the header doesn't have the same format
# as that generated by xgettext...
- print pot_header % {'time': timestamp, 'version':__version__}
+ print pot_header % {'time': timestamp, 'version': __version__}
for k, v in self.__messages.items():
- for filename, lineno in v:
- # location comments are different b/w Solaris and GNU
- d = {'filename': filename,
- 'lineno': lineno}
- if options.location == options.SOLARIS:
+ # location comments are different b/w Solaris and GNU:
+ if options.location == options.SOLARIS:
+ for filename, lineno in v:
+ d = {'filename': filename, 'lineno': lineno}
print _('# File: %(filename)s, line: %(lineno)d') % d
- elif options.location == options.GNU:
- print _('#: %(filename)s:%(lineno)d') % d
+ elif options.location == options.GNU:
+ # fit as many locations on one line, as long as the
+ # resulting line length doesn't exceeds 'options.width'
+ locline = '#:'
+ for filename, lineno in v:
+ d = {'filename': filename, 'lineno': lineno}
+ s = _(' %(filename)s:%(lineno)d') % d
+ if len(locline) + len(s) <= options.width:
+ locline = locline + s
+ else:
+ print locline
+ locline = "#:" + s
+ if len(locline) > 2:
+ print locline
# TBD: sorting, normalizing
print 'msgid', normalize(k)
- print 'msgstr ""'
- print
+ print 'msgstr ""\n'
finally:
sys.stdout = sys.__stdout__
try:
opts, args = getopt.getopt(
sys.argv[1:],
- 'k:d:n:hv',
- ['keyword', 'default-domain', 'help',
- 'add-location=', 'no-location', 'verbose'])
+ 'ad:Ehk:n:o:p:Vvw:x:',
+ ['extract-all', 'default-domain', 'escape', 'help', 'keyword',
+ 'add-location', 'no-location', 'output=', 'output-dir=',
+ 'verbose', 'version', 'width=', 'exclude-file=',
+ ])
except getopt.error, msg:
usage(1, msg)
GNU = 1
SOLARIS = 2
# defaults
+ extractall = 0 # FIXME: currently this option has no effect at all.
+ escape = 0
keywords = []
+ outpath = ''
outfile = 'messages.pot'
location = GNU
verbose = 0
+ width = 78
+ excludefilename = ''
options = Options()
locations = {'gnu' : options.GNU,
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
+ elif opt in ('-a', '--extract-all'):
+ options.extractall = 1
+ elif opt in ('-d', '--default-domain'):
+ options.outfile = arg + '.pot'
+ elif opt in ('-E', '--escape'):
+ options.escape = 1
elif opt in ('-k', '--keyword'):
if arg is None:
default_keywords = []
options.keywords.append(arg)
- elif opt in ('-d', '--default-domain'):
- options.outfile = arg + '.pot'
elif opt in ('-n', '--add-location'):
if arg is None:
arg = 'gnu'
usage(1, _('Invalid value for --add-location: %(arg)s') % d)
elif opt in ('--no-location',):
options.location = 0
+ elif opt in ('-o', '--output'):
+ options.outfile = arg
+ elif opt in ('-p', '--output-dir'):
+ options.outpath = arg
elif opt in ('-v', '--verbose'):
options.verbose = 1
+ elif opt in ('-V', '--version'):
+ print _('pygettext.py (xgettext for Python) %s') % __version__
+ sys.exit(0)
+ elif opt in ('-w', '--width'):
+ try:
+ options.width = int(arg)
+ except ValueError:
+ d = {'arg':arg}
+ usage(1, _('Invalid value for --width: %(arg)s, must be int')
+ % d)
+ elif opt in ('-x', '--exclude-file'):
+ options.excludefilename = arg
+
+ # calculate escapes
+ make_escapes(options.escapes)
# calculate all keywords
options.keywords.extend(default_keywords)
+ # initialize list of strings to exclude
+ if options.excludefilename:
+ try:
+ fp = open(options.excludefilename)
+ options.toexclude = fp.readlines()
+ fp.close()
+ except IOError:
+ sys.stderr.write(_("Can't read --exclude-file: %s") %
+ options.excludefilename)
+ sys.exit(1)
+ else:
+ options.toexclude = []
+
# slurp through all the files
eater = TokenEater(options)
for filename in args:
tokenize.tokenize(fp.readline, eater)
fp.close()
+ if options.outpath:
+ options.outfile = os.path.join(options.outpath, options.outfile)
fp = open(options.outfile, 'w')
eater.write(fp)
fp.close()