From f2cc352afdde98df388a55a45f4d1fa066c2b904 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 2 Jul 2012 13:29:57 -0700 Subject: [PATCH] Do HTML escaping after the tokenization step. --- Tools/scripts/pycolorize.py | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/Tools/scripts/pycolorize.py b/Tools/scripts/pycolorize.py index 576d473f58..2278e16c27 100755 --- a/Tools/scripts/pycolorize.py +++ b/Tools/scripts/pycolorize.py @@ -6,22 +6,29 @@ __author__ = 'Raymond Hettinger' import keyword, tokenize, cgi, functools -def insert(s, i, text): - 'Insert text at position i in string s' - return s[:i] + text + s[i:] - def is_builtin(s): 'Return True if s is the name of a builtin' return s in vars(__builtins__) +def escape_range(lines, start, end): + 'Return escaped content from a range of lines between start and end' + (srow, scol), (erow, ecol) = start, end + if srow == erow: + rows = [lines[srow-1][scol:ecol]] + else: + rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]] + return cgi.escape(''.join(rows)), end + def colorize(source): 'Convert Python source code to an HTML fragment with colorized markup' - text = cgi.escape(source) - lines = text.splitlines(True) + lines = source.splitlines(True) + lines.append('') readline = functools.partial(next, iter(lines), '') actions = [] kind = tok_str = '' tok_type = tokenize.COMMENT + written = (1, 0) + result = [] for tok in tokenize.generate_tokens(readline): prev_tok_type, prev_tok_str = tok_type, tok_str tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok @@ -44,15 +51,17 @@ def colorize(source): elif is_builtin(tok_str) and prev_tok_str != '.': kind = 'builtin' if kind: - actions.append(((srow, scol), (erow, ecol), kind)) - - for (srow, scol), (erow, ecol), kind in reversed(actions): - lines[erow-1] = insert(lines[erow-1], ecol, '') - lines[srow-1] = insert(lines[srow-1], scol, '' % kind) + line_upto_token, written = escape_range(lines, written, (srow, scol)) + line_thru_token, written = escape_range(lines, written, (erow, ecol)) + result += [line_upto_token, '' % kind, + line_thru_token, ''] + else: + line_thru_token, written = escape_range(lines, written, (erow, ecol)) + result += [line_thru_token] - lines.insert(0, '
\n')
-    lines.append('
\n') - return ''.join(lines) + result.insert(0, '
\n')
+    result.append('
\n') + return ''.join(result) default_css = { '.comment': '{color: crimson;}', -- 2.40.0