filters: import more modern scripts

author Jason A. Donenfeld <Jason@zx2c4.com>

Mon, 27 May 2013 19:39:43 +0000 (21:39 +0200)

committer Jason A. Donenfeld <Jason@zx2c4.com>

Mon, 27 May 2013 19:54:16 +0000 (21:54 +0200)
author Jason A. Donenfeld <Jason@zx2c4.com>
Mon, 27 May 2013 19:39:43 +0000 (21:39 +0200)
committer Jason A. Donenfeld <Jason@zx2c4.com>
Mon, 27 May 2013 19:54:16 +0000 (21:54 +0200)
diff --git a/Makefile b/Makefile

index 4b83ac71f09172b11f35f6cb33465092fb4ecdcf..4df95b4ed99a8734b74d57976321f04dcbdb63ba 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -17,6 +17,7 @@ SHA1_HEADER = <openssl/sha.h>
  GIT_VER = 1.8.3
  GIT_URL = https://git-core.googlecode.com/files/git-$(GIT_VER).tar.gz
  INSTALL = install
+COPYTREE = cp -r
  MAN5_TXT = $(wildcard *.5.txt)
  MAN_TXT  = $(MAN5_TXT)
  DOC_MAN5 = $(patsubst %.txt,%,$(MAN5_TXT))
@@ -77,7 +78,7 @@ install: all
         $(INSTALL) -m 0644 cgit.css $(DESTDIR)$(CGIT_DATA_PATH)/cgit.css
         $(INSTALL) -m 0644 cgit.png $(DESTDIR)$(CGIT_DATA_PATH)/cgit.png
         $(INSTALL) -m 0755 -d $(DESTDIR)$(filterdir)
-       $(INSTALL) -m 0755 filters/* $(DESTDIR)$(filterdir)
+       $(COPYTREE)  filters/* $(DESTDIR)$(filterdir)
  
  install-doc: install-man install-html install-pdf
  
diff --git a/cgit.css b/cgit.css

index a50d62b01d05c54e268f1c40e51ba8449818132d..d467c662d0bf5cf92ce3f89cf45312cddae8371e 100644 (file)
--- a/cgit.css
+++ b/cgit.css
@@ -800,17 +800,3 @@ div#cgit table.ssdiff td.space {
  div#cgit table.ssdiff td.space div {
         min-height: 3em;
  }
-
-/* Syntax highlighting */
-div#cgit table.blob .num  { color:#2928ff; }
-div#cgit table.blob .esc  { color:#ff00ff; }
-div#cgit table.blob .str  { color:#ff0000; }
-div#cgit table.blob .dstr { color:#818100; }
-div#cgit table.blob .slc  { color:#838183; font-style:italic; }
-div#cgit table.blob .com  { color:#838183; font-style:italic; }
-div#cgit table.blob .dir  { color:#008200; }
-div#cgit table.blob .sym  { color:#000000; }
-div#cgit table.blob .kwa  { color:#000000; font-weight:bold; }
-div#cgit table.blob .kwb  { color:#830000; }
-div#cgit table.blob .kwc  { color:#000000; font-weight:bold; }
-div#cgit table.blob .kwd  { color:#010181; }
diff --git a/filters/about-formatting.sh b/filters/about-formatting.sh

new file mode 100755 (executable)

index 0000000..313a4e6
--- /dev/null
+++ b/filters/about-formatting.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+# This may be used with the about-filter or repo.about-filter setting in cgitrc.
+# It passes formatting of about pages to differing programs, depending on the usage.
+
+# Markdown support requires perl.
+# RestructuredText support requires python and docutils.
+# Man page support requires groff.
+
+# The following environment variables can be used to retrieve the configuration
+# of the repository for which this script is called:
+# CGIT_REPO_URL        ( = repo.url       setting )
+# CGIT_REPO_NAME       ( = repo.name      setting )
+# CGIT_REPO_PATH       ( = repo.path      setting )
+# CGIT_REPO_OWNER      ( = repo.owner     setting )
+# CGIT_REPO_DEFBRANCH  ( = repo.defbranch setting )
+# CGIT_REPO_SECTION    ( = section        setting )
+# CGIT_REPO_CLONE_URL  ( = repo.clone-url setting )
+
+cd "$(dirname $0)/html-converters/"
+case "$(tr '[:upper:]' '[:lower:]' <<<"$1")" in
+       *.md|*.mkd) exec ./md2html; ;;
+       *.rst) exec ./rst2html; ;;
+       *.[1-9]) exec ./man2html; ;;
+       *.htm|*.html) exec cat; ;;
+       *.txt|*) exec ./txt2html; ;;
+esac
diff --git a/filters/html-converters/man2html b/filters/html-converters/man2html

new file mode 100755 (executable)

index 0000000..1b28437
--- /dev/null
+++ b/filters/html-converters/man2html
@@ -0,0 +1,5 @@
+#!/bin/sh
+echo "<div style=\"font-family: monospace\">"
+groff -mandoc -T html -P -r -P -l | egrep -v '(<html>|<head>|<meta|<title>|</title>|</head>|<body>|</body>|</html>|<!DOCTYPE|"http://www.w3.org)'
+echo "</div>"
+
diff --git a/filters/html-converters/md2html b/filters/html-converters/md2html

new file mode 100755 (executable)

index 0000000..5cab749
--- /dev/null
+++ b/filters/html-converters/md2html
@@ -0,0 +1,2 @@
+#!/bin/sh
+exec "$(dirname "$0")/resources/markdown.pl"
diff --git a/filters/html-converters/resources/markdown.pl b/filters/html-converters/resources/markdown.pl

new file mode 100755 (executable)

index 0000000..abec173
--- /dev/null
+++ b/filters/html-converters/resources/markdown.pl
@@ -0,0 +1,1731 @@
+#!/usr/bin/perl
+
+#
+# Markdown -- A text-to-HTML conversion tool for web writers
+#
+# Copyright (c) 2004 John Gruber
+# <http://daringfireball.net/projects/markdown/>
+#
+
+
+package Markdown;
+require 5.006_000;
+use strict;
+use warnings;
+
+use Digest::MD5 qw(md5_hex);
+use vars qw($VERSION);
+$VERSION = '1.0.1';
+# Tue 14 Dec 2004
+
+## Disabled; causes problems under Perl 5.6.1:
+use utf8;
+binmode( STDOUT, ":utf8" );  # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
+
+
+#
+# Global default settings:
+#
+my $g_empty_element_suffix = " />";     # Change to ">" for HTML output
+my $g_tab_width = 4;
+
+
+#
+# Globals:
+#
+
+# Regex to match balanced [brackets]. See Friedl's
+# "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
+my $g_nested_brackets;
+$g_nested_brackets = qr{
+       (?>                                                             # Atomic matching
+          [^\[\]]+                                                     # Anything other than brackets
+        | 
+          \[
+                (??{ $g_nested_brackets })             # Recursive set of nested brackets
+          \]
+       )*
+}x;
+
+
+# Table of hash values for escaped characters:
+my %g_escape_table;
+foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
+       $g_escape_table{$char} = md5_hex($char);
+}
+
+
+# Global hashes, used by various utility routines
+my %g_urls;
+my %g_titles;
+my %g_html_blocks;
+
+# Used to track when we're inside an ordered or unordered list
+# (see _ProcessListItems() for details):
+my $g_list_level = 0;
+
+
+#### Blosxom plug-in interface ##########################################
+
+# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
+# which posts Markdown should process, using a "meta-markup: markdown"
+# header. If it's set to 0 (the default), Markdown will process all
+# entries.
+my $g_blosxom_use_meta = 0;
+
+sub start { 1; }
+sub story {
+       my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
+
+       if ( (! $g_blosxom_use_meta) or
+            (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
+            ){
+                       $$body_ref  = Markdown($$body_ref);
+     }
+     1;
+}
+
+
+#### Movable Type plug-in interface #####################################
+eval {require MT};  # Test to see if we're running in MT.
+unless ($@) {
+    require MT;
+    import  MT;
+    require MT::Template::Context;
+    import  MT::Template::Context;
+
+       eval {require MT::Plugin};  # Test to see if we're running >= MT 3.0.
+       unless ($@) {
+               require MT::Plugin;
+               import  MT::Plugin;
+               my $plugin = new MT::Plugin({
+                       name => "Markdown",
+                       description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
+                       doc_link => 'http://daringfireball.net/projects/markdown/'
+               });
+               MT->add_plugin( $plugin );
+       }
+
+       MT::Template::Context->add_container_tag(MarkdownOptions => sub {
+               my $ctx  = shift;
+               my $args = shift;
+               my $builder = $ctx->stash('builder');
+               my $tokens = $ctx->stash('tokens');
+
+               if (defined ($args->{'output'}) ) {
+                       $ctx->stash('markdown_output', lc $args->{'output'});
+               }
+
+               defined (my $str = $builder->build($ctx, $tokens) )
+                       or return $ctx->error($builder->errstr);
+               $str;           # return value
+       });
+
+       MT->add_text_filter('markdown' => {
+               label     => 'Markdown',
+               docs      => 'http://daringfireball.net/projects/markdown/',
+               on_format => sub {
+                       my $text = shift;
+                       my $ctx  = shift;
+                       my $raw  = 0;
+                   if (defined $ctx) {
+                       my $output = $ctx->stash('markdown_output'); 
+                               if (defined $output  &&  $output =~ m/^html/i) {
+                                       $g_empty_element_suffix = ">";
+                                       $ctx->stash('markdown_output', '');
+                               }
+                               elsif (defined $output  &&  $output eq 'raw') {
+                                       $raw = 1;
+                                       $ctx->stash('markdown_output', '');
+                               }
+                               else {
+                                       $raw = 0;
+                                       $g_empty_element_suffix = " />";
+                               }
+                       }
+                       $text = $raw ? $text : Markdown($text);
+                       $text;
+               },
+       });
+
+       # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
+       my $smartypants;
+
+       {
+               no warnings "once";
+               $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
+       }
+
+       if ($smartypants) {
+               MT->add_text_filter('markdown_with_smartypants' => {
+                       label     => 'Markdown With SmartyPants',
+                       docs      => 'http://daringfireball.net/projects/markdown/',
+                       on_format => sub {
+                               my $text = shift;
+                               my $ctx  = shift;
+                               if (defined $ctx) {
+                                       my $output = $ctx->stash('markdown_output'); 
+                                       if (defined $output  &&  $output eq 'html') {
+                                               $g_empty_element_suffix = ">";
+                                       }
+                                       else {
+                                               $g_empty_element_suffix = " />";
+                                       }
+                               }
+                               $text = Markdown($text);
+                               $text = $smartypants->($text, '1');
+                       },
+               });
+       }
+}
+else {
+#### BBEdit/command-line text filter interface ##########################
+# Needs to be hidden from MT (and Blosxom when running in static mode).
+
+    # We're only using $blosxom::version once; tell Perl not to warn us:
+       no warnings 'once';
+    unless ( defined($blosxom::version) ) {
+               use warnings;
+
+               #### Check for command-line switches: #################
+               my %cli_opts;
+               use Getopt::Long;
+               Getopt::Long::Configure('pass_through');
+               GetOptions(\%cli_opts,
+                       'version',
+                       'shortversion',
+                       'html4tags',
+               );
+               if ($cli_opts{'version'}) {             # Version info
+                       print "\nThis is Markdown, version $VERSION.\n";
+                       print "Copyright 2004 John Gruber\n";
+                       print "http://daringfireball.net/projects/markdown/\n\n";
+                       exit 0;
+               }
+               if ($cli_opts{'shortversion'}) {                # Just the version number string.
+                       print $VERSION;
+                       exit 0;
+               }
+               if ($cli_opts{'html4tags'}) {                   # Use HTML tag style instead of XHTML
+                       $g_empty_element_suffix = ">";
+               }
+
+
+               #### Process incoming text: ###########################
+               my $text;
+               {
+                       local $/;               # Slurp the whole file
+                       $text = <>;
+               }
+       print <<'EOT';
+<style>
+.markdown-body {
+    font-size: 14px;
+    line-height: 1.6;
+    overflow: hidden;
+}
+.markdown-body>*:first-child {
+    margin-top: 0 !important;
+}
+.markdown-body>*:last-child {
+    margin-bottom: 0 !important;
+}
+.markdown-body a.absent {
+    color: #c00;
+}
+.markdown-body a.anchor {
+    display: block;
+    padding-left: 30px;
+    margin-left: -30px;
+    cursor: pointer;
+    position: absolute;
+    top: 0;
+    left: 0;
+    bottom: 0;
+}
+.markdown-body h1, .markdown-body h2, .markdown-body h3, .markdown-body h4, .markdown-body h5, .markdown-body h6 {
+    margin: 20px 0 10px;
+    padding: 0;
+    font-weight: bold;
+    -webkit-font-smoothing: antialiased;
+    cursor: text;
+    position: relative;
+}
+.markdown-body h1 .mini-icon-link, .markdown-body h2 .mini-icon-link, .markdown-body h3 .mini-icon-link, .markdown-body h4 .mini-icon-link, .markdown-body h5 .mini-icon-link, .markdown-body h6 .mini-icon-link {
+    display: none;
+    color: #000;
+}
+.markdown-body h1:hover a.anchor, .markdown-body h2:hover a.anchor, .markdown-body h3:hover a.anchor, .markdown-body h4:hover a.anchor, .markdown-body h5:hover a.anchor, .markdown-body h6:hover a.anchor {
+    text-decoration: none;
+    line-height: 1;
+    padding-left: 0;
+    margin-left: -22px;
+    top: 15%}
+.markdown-body h1:hover a.anchor .mini-icon-link, .markdown-body h2:hover a.anchor .mini-icon-link, .markdown-body h3:hover a.anchor .mini-icon-link, .markdown-body h4:hover a.anchor .mini-icon-link, .markdown-body h5:hover a.anchor .mini-icon-link, .markdown-body h6:hover a.anchor .mini-icon-link {
+    display: inline-block;
+}
+.markdown-body h1 tt, .markdown-body h1 code, .markdown-body h2 tt, .markdown-body h2 code, .markdown-body h3 tt, .markdown-body h3 code, .markdown-body h4 tt, .markdown-body h4 code, .markdown-body h5 tt, .markdown-body h5 code, .markdown-body h6 tt, .markdown-body h6 code {
+    font-size: inherit;
+}
+.markdown-body h1 {
+    font-size: 28px;
+    color: #000;
+}
+.markdown-body h2 {
+    font-size: 24px;
+    border-bottom: 1px solid #ccc;
+    color: #000;
+}
+.markdown-body h3 {
+    font-size: 18px;
+}
+.markdown-body h4 {
+    font-size: 16px;
+}
+.markdown-body h5 {
+    font-size: 14px;
+}
+.markdown-body h6 {
+    color: #777;
+    font-size: 14px;
+}
+.markdown-body p, .markdown-body blockquote, .markdown-body ul, .markdown-body ol, .markdown-body dl, .markdown-body table, .markdown-body pre {
+    margin: 15px 0;
+}
+.markdown-body hr {
+    background: transparent url("/dirty-shade.png") repeat-x 0 0;
+    border: 0 none;
+    color: #ccc;
+    height: 4px;
+    padding: 0;
+}
+.markdown-body>h2:first-child, .markdown-body>h1:first-child, .markdown-body>h1:first-child+h2, .markdown-body>h3:first-child, .markdown-body>h4:first-child, .markdown-body>h5:first-child, .markdown-body>h6:first-child {
+    margin-top: 0;
+    padding-top: 0;
+}
+.markdown-body a:first-child h1, .markdown-body a:first-child h2, .markdown-body a:first-child h3, .markdown-body a:first-child h4, .markdown-body a:first-child h5, .markdown-body a:first-child h6 {
+    margin-top: 0;
+    padding-top: 0;
+}
+.markdown-body h1+p, .markdown-body h2+p, .markdown-body h3+p, .markdown-body h4+p, .markdown-body h5+p, .markdown-body h6+p {
+    margin-top: 0;
+}
+.markdown-body li p.first {
+    display: inline-block;
+}
+.markdown-body ul, .markdown-body ol {
+    padding-left: 30px;
+}
+.markdown-body ul.no-list, .markdown-body ol.no-list {
+    list-style-type: none;
+    padding: 0;
+}
+.markdown-body ul li>:first-child, .markdown-body ul li ul:first-of-type, .markdown-body ul li ol:first-of-type, .markdown-body ol li>:first-child, .markdown-body ol li ul:first-of-type, .markdown-body ol li ol:first-of-type {
+    margin-top: 0px;
+}
+.markdown-body ul li p:last-of-type, .markdown-body ol li p:last-of-type {
+    margin-bottom: 0;
+}
+.markdown-body ul ul, .markdown-body ul ol, .markdown-body ol ol, .markdown-body ol ul {
+    margin-bottom: 0;
+}
+.markdown-body dl {
+    padding: 0;
+}
+.markdown-body dl dt {
+    font-size: 14px;
+    font-weight: bold;
+    font-style: italic;
+    padding: 0;
+    margin: 15px 0 5px;
+}
+.markdown-body dl dt:first-child {
+    padding: 0;
+}
+.markdown-body dl dt>:first-child {
+    margin-top: 0px;
+}
+.markdown-body dl dt>:last-child {
+    margin-bottom: 0px;
+}
+.markdown-body dl dd {
+    margin: 0 0 15px;
+    padding: 0 15px;
+}
+.markdown-body dl dd>:first-child {
+    margin-top: 0px;
+}
+.markdown-body dl dd>:last-child {
+    margin-bottom: 0px;
+}
+.markdown-body blockquote {
+    border-left: 4px solid #DDD;
+    padding: 0 15px;
+    color: #777;
+}
+.markdown-body blockquote>:first-child {
+    margin-top: 0px;
+}
+.markdown-body blockquote>:last-child {
+    margin-bottom: 0px;
+}
+.markdown-body table th {
+    font-weight: bold;
+}
+.markdown-body table th, .markdown-body table td {
+    border: 1px solid #ccc;
+    padding: 6px 13px;
+}
+.markdown-body table tr {
+    border-top: 1px solid #ccc;
+    background-color: #fff;
+}
+.markdown-body table tr:nth-child(2n) {
+    background-color: #f8f8f8;
+}
+.markdown-body img {
+    max-width: 100%;
+    -moz-box-sizing: border-box;
+    box-sizing: border-box;
+}
+.markdown-body span.frame {
+    display: block;
+    overflow: hidden;
+}
+.markdown-body span.frame>span {
+    border: 1px solid #ddd;
+    display: block;
+    float: left;
+    overflow: hidden;
+    margin: 13px 0 0;
+    padding: 7px;
+    width: auto;
+}
+.markdown-body span.frame span img {
+    display: block;
+    float: left;
+}
+.markdown-body span.frame span span {
+    clear: both;
+    color: #333;
+    display: block;
+    padding: 5px 0 0;
+}
+.markdown-body span.align-center {
+    display: block;
+    overflow: hidden;
+    clear: both;
+}
+.markdown-body span.align-center>span {
+    display: block;
+    overflow: hidden;
+    margin: 13px auto 0;
+    text-align: center;
+}
+.markdown-body span.align-center span img {
+    margin: 0 auto;
+    text-align: center;
+}
+.markdown-body span.align-right {
+    display: block;
+    overflow: hidden;
+    clear: both;
+}
+.markdown-body span.align-right>span {
+    display: block;
+    overflow: hidden;
+    margin: 13px 0 0;
+    text-align: right;
+}
+.markdown-body span.align-right span img {
+    margin: 0;
+    text-align: right;
+}
+.markdown-body span.float-left {
+    display: block;
+    margin-right: 13px;
+    overflow: hidden;
+    float: left;
+}
+.markdown-body span.float-left span {
+    margin: 13px 0 0;
+}
+.markdown-body span.float-right {
+    display: block;
+    margin-left: 13px;
+    overflow: hidden;
+    float: right;
+}
+.markdown-body span.float-right>span {
+    display: block;
+    overflow: hidden;
+    margin: 13px auto 0;
+    text-align: right;
+}
+.markdown-body code, .markdown-body tt {
+    margin: 0 2px;
+    padding: 0px 5px;
+    border: 1px solid #eaeaea;
+    background-color: #f8f8f8;
+    border-radius: 3px;
+}
+.markdown-body code {
+    white-space: nowrap;
+}
+.markdown-body pre>code {
+    margin: 0;
+    padding: 0;
+    white-space: pre;
+    border: none;
+    background: transparent;
+}
+.markdown-body .highlight pre, .markdown-body pre {
+    background-color: #f8f8f8;
+    border: 1px solid #ccc;
+    font-size: 13px;
+    line-height: 19px;
+    overflow: auto;
+    padding: 6px 10px;
+    border-radius: 3px;
+}
+.markdown-body pre code, .markdown-body pre tt {
+    margin: 0;
+    padding: 0;
+    background-color: transparent;
+    border: none;
+}
+</style>
+EOT
+       print "<div class='markdown-body'>";
+        print Markdown($text);
+       print "</div>";
+    }
+}
+
+
+
+sub Markdown {
+#
+# Main function. The order in which other subs are called here is
+# essential. Link and image substitutions need to happen before
+# _EscapeSpecialChars(), so that any *'s or _'s in the <a>
+# and <img> tags get encoded.
+#
+       my $text = shift;
+
+       # Clear the global hashes. If we don't clear these, you get conflicts
+       # from other articles when generating a page which contains more than
+       # one article (e.g. an index page that shows the N most recent
+       # articles):
+       %g_urls = ();
+       %g_titles = ();
+       %g_html_blocks = ();
+
+
+       # Standardize line endings:
+       $text =~ s{\r\n}{\n}g;  # DOS to Unix
+       $text =~ s{\r}{\n}g;    # Mac to Unix
+
+       # Make sure $text ends with a couple of newlines:
+       $text .= "\n\n";
+
+       # Convert all tabs to spaces.
+       $text = _Detab($text);
+
+       # Strip any lines consisting only of spaces and tabs.
+       # This makes subsequent regexen easier to write, because we can
+       # match consecutive blank lines with /\n+/ instead of something
+       # contorted like /[ \t]*\n+/ .
+       $text =~ s/^[ \t]+$//mg;
+
+       # Turn block-level HTML blocks into hash entries
+       $text = _HashHTMLBlocks($text);
+
+       # Strip link definitions, store in hashes.
+       $text = _StripLinkDefinitions($text);
+
+       $text = _RunBlockGamut($text);
+
+       $text = _UnescapeSpecialChars($text);
+
+       return $text . "\n";
+}
+
+
+sub _StripLinkDefinitions {
+#
+# Strips link definitions from text, stores the URLs and titles in
+# hash references.
+#
+       my $text = shift;
+       my $less_than_tab = $g_tab_width - 1;
+
+       # Link defs are in the form: ^[id]: url "optional title"
+       while ($text =~ s{
+                                               ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1
+                                                 [ \t]*
+                                                 \n?                           # maybe *one* newline
+                                                 [ \t]*
+                                               <?(\S+?)>?                      # url = $2
+                                                 [ \t]*
+                                                 \n?                           # maybe one newline
+                                                 [ \t]*
+                                               (?:
+                                                       (?<=\s)                 # lookbehind for whitespace
+                                                       ["(]
+                                                       (.+?)                   # title = $3
+                                                       [")]
+                                                       [ \t]*
+                                               )?      # title is optional
+                                               (?:\n+|\Z)
+                                       }
+                                       {}mx) {
+               $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 );    # Link IDs are case-insensitive
+               if ($3) {
+                       $g_titles{lc $1} = $3;
+                       $g_titles{lc $1} =~ s/"/&quot;/g;
+               }
+       }
+
+       return $text;
+}
+
+
+sub _HashHTMLBlocks {
+       my $text = shift;
+       my $less_than_tab = $g_tab_width - 1;
+
+       # Hashify HTML blocks:
+       # We only want to do this for block-level HTML tags, such as headers,
+       # lists, and tables. That's because we still want to wrap <p>s around
+       # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
+       # phrase emphasis, and spans. The list of tags we're looking for is
+       # hard-coded:
+       my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
+       my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
+
+       # First, look for nested blocks, e.g.:
+       #       <div>
+       #               <div>
+       #               tags for inner block must be indented.
+       #               </div>
+       #       </div>
+       #
+       # The outermost tags must start at the left margin for this to match, and
+       # the inner nested divs must be indented.
+       # We need to do this before the next, more liberal match, because the next
+       # match will start at the first `<div>` and stop at the first `</div>`.
+       $text =~ s{
+                               (                                               # save in $1
+                                       ^                                       # start of line  (with /m)
+                                       <($block_tags_a)        # start tag = $2
+                                       \b                                      # word break
+                                       (.*\n)*?                        # any number of lines, minimally matching
+                                       </\2>                           # the matching end tag
+                                       [ \t]*                          # trailing spaces/tabs
+                                       (?=\n+|\Z)      # followed by a newline or end of document
+                               )
+                       }{
+                               my $key = md5_hex($1);
+                               $g_html_blocks{$key} = $1;
+                               "\n\n" . $key . "\n\n";
+                       }egmx;
+
+
+       #
+       # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
+       #
+       $text =~ s{
+                               (                                               # save in $1
+                                       ^                                       # start of line  (with /m)
+                                       <($block_tags_b)        # start tag = $2
+                                       \b                                      # word break
+                                       (.*\n)*?                        # any number of lines, minimally matching
+                                       .*</\2>                         # the matching end tag
+                                       [ \t]*                          # trailing spaces/tabs
+                                       (?=\n+|\Z)      # followed by a newline or end of document
+                               )
+                       }{
+                               my $key = md5_hex($1);
+                               $g_html_blocks{$key} = $1;
+                               "\n\n" . $key . "\n\n";
+                       }egmx;
+       # Special case just for <hr />. It was easier to make a special case than
+       # to make the other regex more complicated.     
+       $text =~ s{
+                               (?:
+                                       (?<=\n\n)               # Starting after a blank line
+                                       |                               # or
+                                       \A\n?                   # the beginning of the doc
+                               )
+                               (                                               # save in $1
+                                       [ ]{0,$less_than_tab}
+                                       <(hr)                           # start tag = $2
+                                       \b                                      # word break
+                                       ([^<>])*?                       # 
+                                       /?>                                     # the matching end tag
+                                       [ \t]*
+                                       (?=\n{2,}|\Z)           # followed by a blank line or end of document
+                               )
+                       }{
+                               my $key = md5_hex($1);
+                               $g_html_blocks{$key} = $1;
+                               "\n\n" . $key . "\n\n";
+                       }egx;
+
+       # Special case for standalone HTML comments:
+       $text =~ s{
+                               (?:
+                                       (?<=\n\n)               # Starting after a blank line
+                                       |                               # or
+                                       \A\n?                   # the beginning of the doc
+                               )
+                               (                                               # save in $1
+                                       [ ]{0,$less_than_tab}
+                                       (?s:
+                                               <!
+                                               (--.*?--\s*)+
+                                               >
+                                       )
+                                       [ \t]*
+                                       (?=\n{2,}|\Z)           # followed by a blank line or end of document
+                               )
+                       }{
+                               my $key = md5_hex($1);
+                               $g_html_blocks{$key} = $1;
+                               "\n\n" . $key . "\n\n";
+                       }egx;
+
+
+       return $text;
+}
+
+
+sub _RunBlockGamut {
+#
+# These are all the transformations that form block-level
+# tags like paragraphs, headers, and list items.
+#
+       my $text = shift;
+
+       $text = _DoHeaders($text);
+
+       # Do Horizontal Rules:
+       $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
+       $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
+       $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
+
+       $text = _DoLists($text);
+
+       $text = _DoCodeBlocks($text);
+
+       $text = _DoBlockQuotes($text);
+
+       # We already ran _HashHTMLBlocks() before, in Markdown(), but that
+       # was to escape raw HTML in the original Markdown source. This time,
+       # we're escaping the markup we've just created, so that we don't wrap
+       # <p> tags around block-level tags.
+       $text = _HashHTMLBlocks($text);
+
+       $text = _FormParagraphs($text);
+
+       return $text;
+}
+
+
+sub _RunSpanGamut {
+#
+# These are all the transformations that occur *within* block-level
+# tags like paragraphs, headers, and list items.
+#
+       my $text = shift;
+
+       $text = _DoCodeSpans($text);
+
+       $text = _EscapeSpecialChars($text);
+
+       # Process anchor and image tags. Images must come first,
+       # because ![foo][f] looks like an anchor.
+       $text = _DoImages($text);
+       $text = _DoAnchors($text);
+
+       # Make links out of things like `<http://example.com/>`
+       # Must come after _DoAnchors(), because you can use < and >
+       # delimiters in inline links like [this](<url>).
+       $text = _DoAutoLinks($text);
+
+       $text = _EncodeAmpsAndAngles($text);
+
+       $text = _DoItalicsAndBold($text);
+
+       # Do hard breaks:
+       $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g;
+
+       return $text;
+}
+
+
+sub _EscapeSpecialChars {
+       my $text = shift;
+       my $tokens ||= _TokenizeHTML($text);
+
+       $text = '';   # rebuild $text from the tokens
+#      my $in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags.
+#      my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
+
+       foreach my $cur_token (@$tokens) {
+               if ($cur_token->[0] eq "tag") {
+                       # Within tags, encode * and _ so they don't conflict
+                       # with their use in Markdown for italics and strong.
+                       # We're replacing each such character with its
+                       # corresponding MD5 checksum value; this is likely
+                       # overkill, but it should prevent us from colliding
+                       # with the escape values by accident.
+                       $cur_token->[1] =~  s! \* !$g_escape_table{'*'}!gx;
+                       $cur_token->[1] =~  s! _  !$g_escape_table{'_'}!gx;
+                       $text .= $cur_token->[1];
+               } else {
+                       my $t = $cur_token->[1];
+                       $t = _EncodeBackslashEscapes($t);
+                       $text .= $t;
+               }
+       }
+       return $text;
+}
+
+
+sub _DoAnchors {
+#
+# Turn Markdown link shortcuts into XHTML <a> tags.
+#
+       my $text = shift;
+
+       #
+       # First, handle reference-style links: [link text] [id]
+       #
+       $text =~ s{
+               (                                       # wrap whole match in $1
+                 \[
+                   ($g_nested_brackets)        # link text = $2
+                 \]
+
+                 [ ]?                          # one optional space
+                 (?:\n[ ]*)?           # one optional newline followed by spaces
+
+                 \[
+                   (.*?)               # id = $3
+                 \]
+               )
+       }{
+               my $result;
+               my $whole_match = $1;
+               my $link_text   = $2;
+               my $link_id     = lc $3;
+
+               if ($link_id eq "") {
+                       $link_id = lc $link_text;     # for shortcut links like [this][].
+               }
+
+               if (defined $g_urls{$link_id}) {
+                       my $url = $g_urls{$link_id};
+                       $url =~ s! \* !$g_escape_table{'*'}!gx;         # We've got to encode these to avoid
+                       $url =~ s!  _ !$g_escape_table{'_'}!gx;         # conflicting with italics/bold.
+                       $result = "<a href=\"$url\"";
+                       if ( defined $g_titles{$link_id} ) {
+                               my $title = $g_titles{$link_id};
+                               $title =~ s! \* !$g_escape_table{'*'}!gx;
+                               $title =~ s!  _ !$g_escape_table{'_'}!gx;
+                               $result .=  " title=\"$title\"";
+                       }
+                       $result .= ">$link_text</a>";
+               }
+               else {
+                       $result = $whole_match;
+               }
+               $result;
+       }xsge;
+
+       #
+       # Next, inline-style links: [link text](url "optional title")
+       #
+       $text =~ s{
+               (                               # wrap whole match in $1
+                 \[
+                   ($g_nested_brackets)        # link text = $2
+                 \]
+                 \(                    # literal paren
+                       [ \t]*
+                       <?(.*?)>?       # href = $3
+                       [ \t]*
+                       (                       # $4
+                         (['"])        # quote char = $5
+                         (.*?)         # Title = $6
+                         \5            # matching quote
+                       )?                      # title is optional
+                 \)
+               )
+       }{
+               my $result;
+               my $whole_match = $1;
+               my $link_text   = $2;
+               my $url                 = $3;
+               my $title               = $6;
+
+               $url =~ s! \* !$g_escape_table{'*'}!gx;         # We've got to encode these to avoid
+               $url =~ s!  _ !$g_escape_table{'_'}!gx;         # conflicting with italics/bold.
+               $result = "<a href=\"$url\"";
+
+               if (defined $title) {
+                       $title =~ s/"/&quot;/g;
+                       $title =~ s! \* !$g_escape_table{'*'}!gx;
+                       $title =~ s!  _ !$g_escape_table{'_'}!gx;
+                       $result .=  " title=\"$title\"";
+               }
+
+               $result .= ">$link_text</a>";
+
+               $result;
+       }xsge;
+
+       return $text;
+}
+
+
+sub _DoImages {
+#
+# Turn Markdown image shortcuts into <img> tags.
+#
+       my $text = shift;
+
+       #
+       # First, handle reference-style labeled images: ![alt text][id]
+       #
+       $text =~ s{
+               (                               # wrap whole match in $1
+                 !\[
+                   (.*?)               # alt text = $2
+                 \]
+
+                 [ ]?                          # one optional space
+                 (?:\n[ ]*)?           # one optional newline followed by spaces
+
+                 \[
+                   (.*?)               # id = $3
+                 \]
+
+               )
+       }{
+               my $result;
+               my $whole_match = $1;
+               my $alt_text    = $2;
+               my $link_id     = lc $3;
+
+               if ($link_id eq "") {
+                       $link_id = lc $alt_text;     # for shortcut links like ![this][].
+               }
+
+               $alt_text =~ s/"/&quot;/g;
+               if (defined $g_urls{$link_id}) {
+                       my $url = $g_urls{$link_id};
+                       $url =~ s! \* !$g_escape_table{'*'}!gx;         # We've got to encode these to avoid
+                       $url =~ s!  _ !$g_escape_table{'_'}!gx;         # conflicting with italics/bold.
+                       $result = "<img src=\"$url\" alt=\"$alt_text\"";
+                       if (defined $g_titles{$link_id}) {
+                               my $title = $g_titles{$link_id};
+                               $title =~ s! \* !$g_escape_table{'*'}!gx;
+                               $title =~ s!  _ !$g_escape_table{'_'}!gx;
+                               $result .=  " title=\"$title\"";
+                       }
+                       $result .= $g_empty_element_suffix;
+               }
+               else {
+                       # If there's no such link ID, leave intact:
+                       $result = $whole_match;
+               }
+
+               $result;
+       }xsge;
+
+       #
+       # Next, handle inline images:  ![alt text](url "optional title")
+       # Don't forget: encode * and _
+
+       $text =~ s{
+               (                               # wrap whole match in $1
+                 !\[
+                   (.*?)               # alt text = $2
+                 \]
+                 \(                    # literal paren
+                       [ \t]*
+                       <?(\S+?)>?      # src url = $3
+                       [ \t]*
+                       (                       # $4
+                         (['"])        # quote char = $5
+                         (.*?)         # title = $6
+                         \5            # matching quote
+                         [ \t]*
+                       )?                      # title is optional
+                 \)
+               )
+       }{
+               my $result;
+               my $whole_match = $1;
+               my $alt_text    = $2;
+               my $url                 = $3;
+               my $title               = '';
+               if (defined($6)) {
+                       $title          = $6;
+               }
+
+               $alt_text =~ s/"/&quot;/g;
+               $title    =~ s/"/&quot;/g;
+               $url =~ s! \* !$g_escape_table{'*'}!gx;         # We've got to encode these to avoid
+               $url =~ s!  _ !$g_escape_table{'_'}!gx;         # conflicting with italics/bold.
+               $result = "<img src=\"$url\" alt=\"$alt_text\"";
+               if (defined $title) {
+                       $title =~ s! \* !$g_escape_table{'*'}!gx;
+                       $title =~ s!  _ !$g_escape_table{'_'}!gx;
+                       $result .=  " title=\"$title\"";
+               }
+               $result .= $g_empty_element_suffix;
+
+               $result;
+       }xsge;
+
+       return $text;
+}
+
+
+sub _DoHeaders {
+       my $text = shift;
+
+       # Setext-style headers:
+       #         Header 1
+       #         ========
+       #  
+       #         Header 2
+       #         --------
+       #
+       $text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{
+               "<h1>"  .  _RunSpanGamut($1)  .  "</h1>\n\n";
+       }egmx;
+
+       $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{
+               "<h2>"  .  _RunSpanGamut($1)  .  "</h2>\n\n";
+       }egmx;
+
+
+       # atx-style headers:
+       #       # Header 1
+       #       ## Header 2
+       #       ## Header 2 with closing hashes ##
+       #       ...
+       #       ###### Header 6
+       #
+       $text =~ s{
+                       ^(\#{1,6})      # $1 = string of #'s
+                       [ \t]*
+                       (.+?)           # $2 = Header text
+                       [ \t]*
+                       \#*                     # optional closing #'s (not counted)
+                       \n+
+               }{
+                       my $h_level = length($1);
+                       "<h$h_level>"  .  _RunSpanGamut($2)  .  "</h$h_level>\n\n";
+               }egmx;
+
+       return $text;
+}
+
+
+sub _DoLists {
+#
+# Form HTML ordered (numbered) and unordered (bulleted) lists.
+#
+       my $text = shift;
+       my $less_than_tab = $g_tab_width - 1;
+
+       # Re-usable patterns to match list item bullets and number markers:
+       my $marker_ul  = qr/[*+-]/;
+       my $marker_ol  = qr/\d+[.]/;
+       my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
+
+       # Re-usable pattern to match any entirel ul or ol list:
+       my $whole_list = qr{
+               (                                                               # $1 = whole list
+                 (                                                             # $2
+                       [ ]{0,$less_than_tab}
+                       (${marker_any})                         # $3 = first list item marker
+                       [ \t]+
+                 )
+                 (?s:.+?)
+                 (                                                             # $4
+                         \z
+                       |
+                         \n{2,}
+                         (?=\S)
+                         (?!                                           # Negative lookahead for another list item marker
+                               [ \t]*
+                               ${marker_any}[ \t]+
+                         )
+                 )
+               )
+       }mx;
+
+       # We use a different prefix before nested lists than top-level lists.
+       # See extended comment in _ProcessListItems().
+       #
+       # Note: There's a bit of duplication here. My original implementation
+       # created a scalar regex pattern as the conditional result of the test on
+       # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
+       # substitution once, using the scalar as the pattern. This worked,
+       # everywhere except when running under MT on my hosting account at Pair
+       # Networks. There, this caused all rebuilds to be killed by the reaper (or
+       # perhaps they crashed, but that seems incredibly unlikely given that the
+       # same script on the same server ran fine *except* under MT. I've spent
+       # more time trying to figure out why this is happening than I'd like to
+       # admit. My only guess, backed up by the fact that this workaround works,
+       # is that Perl optimizes the substition when it can figure out that the
+       # pattern will never change, and when this optimization isn't on, we run
+       # afoul of the reaper. Thus, the slightly redundant code to that uses two
+       # static s/// patterns rather than one conditional pattern.
+
+       if ($g_list_level) {
+               $text =~ s{
+                               ^
+                               $whole_list
+                       }{
+                               my $list = $1;
+                               my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
+                               # Turn double returns into triple returns, so that we can make a
+                               # paragraph for the last item in a list, if necessary:
+                               $list =~ s/\n{2,}/\n\n\n/g;
+                               my $result = _ProcessListItems($list, $marker_any);
+                               $result = "<$list_type>\n" . $result . "</$list_type>\n";
+                               $result;
+                       }egmx;
+       }
+       else {
+               $text =~ s{
+                               (?:(?<=\n\n)|\A\n?)
+                               $whole_list
+                       }{
+                               my $list = $1;
+                               my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
+                               # Turn double returns into triple returns, so that we can make a
+                               # paragraph for the last item in a list, if necessary:
+                               $list =~ s/\n{2,}/\n\n\n/g;
+                               my $result = _ProcessListItems($list, $marker_any);
+                               $result = "<$list_type>\n" . $result . "</$list_type>\n";
+                               $result;
+                       }egmx;
+       }
+
+
+       return $text;
+}
+
+
+sub _ProcessListItems {
+#
+#      Process the contents of a single ordered or unordered list, splitting it
+#      into individual list items.
+#
+
+       my $list_str = shift;
+       my $marker_any = shift;
+
+
+       # The $g_list_level global keeps track of when we're inside a list.
+       # Each time we enter a list, we increment it; when we leave a list,
+       # we decrement. If it's zero, we're not in a list anymore.
+       #
+       # We do this because when we're not inside a list, we want to treat
+       # something like this:
+       #
+       #               I recommend upgrading to version
+       #               8. Oops, now this line is treated
+       #               as a sub-list.
+       #
+       # As a single paragraph, despite the fact that the second line starts
+       # with a digit-period-space sequence.
+       #
+       # Whereas when we're inside a list (or sub-list), that line will be
+       # treated as the start of a sub-list. What a kludge, huh? This is
+       # an aspect of Markdown's syntax that's hard to parse perfectly
+       # without resorting to mind-reading. Perhaps the solution is to
+       # change the syntax rules such that sub-lists must start with a
+       # starting cardinal number; e.g. "1." or "a.".
+
+       $g_list_level++;
+
+       # trim trailing blank lines:
+       $list_str =~ s/\n{2,}\z/\n/;
+
+
+       $list_str =~ s{
+               (\n)?                                                   # leading line = $1
+               (^[ \t]*)                                               # leading whitespace = $2
+               ($marker_any) [ \t]+                    # list marker = $3
+               ((?s:.+?)                                               # list item text   = $4
+               (\n{1,2}))
+               (?= \n* (\z | \2 ($marker_any) [ \t]+))
+       }{
+               my $item = $4;
+               my $leading_line = $1;
+               my $leading_space = $2;
+
+               if ($leading_line or ($item =~ m/\n{2,}/)) {
+                       $item = _RunBlockGamut(_Outdent($item));
+               }
+               else {
+                       # Recursion for sub-lists:
+                       $item = _DoLists(_Outdent($item));
+                       chomp $item;
+                       $item = _RunSpanGamut($item);
+               }
+
+               "<li>" . $item . "</li>\n";
+       }egmx;
+
+       $g_list_level--;
+       return $list_str;
+}
+
+
+
+sub _DoCodeBlocks {
+#
+#      Process Markdown `<pre><code>` blocks.
+#      
+
+       my $text = shift;
+
+       $text =~ s{
+                       (?:\n\n|\A)
+                       (                   # $1 = the code block -- one or more lines, starting with a space/tab
+                         (?:
+                           (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
+                           .*\n+
+                         )+
+                       )
+                       ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
+               }{
+                       my $codeblock = $1;
+                       my $result; # return value
+
+                       $codeblock = _EncodeCode(_Outdent($codeblock));
+                       $codeblock = _Detab($codeblock);
+                       $codeblock =~ s/\A\n+//; # trim leading newlines
+                       $codeblock =~ s/\s+\z//; # trim trailing whitespace
+
+                       $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
+
+                       $result;
+               }egmx;
+
+       return $text;
+}
+
+
+sub _DoCodeSpans {
+#
+#      *       Backtick quotes are used for <code></code> spans.
+# 
+#      *       You can use multiple backticks as the delimiters if you want to
+#              include literal backticks in the code span. So, this input:
+#     
+#         Just type ``foo `bar` baz`` at the prompt.
+#     
+#      Will translate to:
+#     
+#         <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
+#     
+#              There's no arbitrary limit to the number of backticks you
+#              can use as delimters. If you need three consecutive backticks
+#              in your code, use four for delimiters, etc.
+#
+#      *       You can use spaces to get literal backticks at the edges:
+#     
+#         ... type `` `bar` `` ...
+#     
+#      Turns to:
+#     
+#         ... type <code>`bar`</code> ...
+#
+
+       my $text = shift;
+
+       $text =~ s@
+                       (`+)            # $1 = Opening run of `
+                       (.+?)           # $2 = The code block
+                       (?<!`)
+                       \1                      # Matching closer
+                       (?!`)
+               @
+                       my $c = "$2";
+                       $c =~ s/^[ \t]*//g; # leading whitespace
+                       $c =~ s/[ \t]*$//g; # trailing whitespace
+                       $c = _EncodeCode($c);
+                       "<code>$c</code>";
+               @egsx;
+
+       return $text;
+}
+
+
+sub _EncodeCode {
+#
+# Encode/escape certain characters inside Markdown code runs.
+# The point is that in code, these characters are literals,
+# and lose their special Markdown meanings.
+#
+    local $_ = shift;
+
+       # Encode all ampersands; HTML entities are not
+       # entities within a Markdown code span.
+       s/&/&amp;/g;
+
+       # Encode $'s, but only if we're running under Blosxom.
+       # (Blosxom interpolates Perl variables in article bodies.)
+       {
+               no warnings 'once';
+       if (defined($blosxom::version)) {
+               s/\$/&#036;/g;  
+       }
+    }
+
+
+       # Do the angle bracket song and dance:
+       s! <  !&lt;!gx;
+       s! >  !&gt;!gx;
+
+       # Now, escape characters that are magic in Markdown:
+       s! \* !$g_escape_table{'*'}!gx;
+       s! _  !$g_escape_table{'_'}!gx;
+       s! {  !$g_escape_table{'{'}!gx;
+       s! }  !$g_escape_table{'}'}!gx;
+       s! \[ !$g_escape_table{'['}!gx;
+       s! \] !$g_escape_table{']'}!gx;
+       s! \\ !$g_escape_table{'\\'}!gx;
+
+       return $_;
+}
+
+
+sub _DoItalicsAndBold {
+       my $text = shift;
+
+       # <strong> must go first:
+       $text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
+               {<strong>$2</strong>}gsx;
+
+       $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }
+               {<em>$2</em>}gsx;
+
+       return $text;
+}
+
+
+sub _DoBlockQuotes {
+       my $text = shift;
+
+       $text =~ s{
+                 (                                                             # Wrap whole match in $1
+                       (
+                         ^[ \t]*>[ \t]?                        # '>' at the start of a line
+                           .+\n                                        # rest of the first line
+                         (.+\n)*                                       # subsequent consecutive lines
+                         \n*                                           # blanks
+                       )+
+                 )
+               }{
+                       my $bq = $1;
+                       $bq =~ s/^[ \t]*>[ \t]?//gm;    # trim one level of quoting
+                       $bq =~ s/^[ \t]+$//mg;                  # trim whitespace-only lines
+                       $bq = _RunBlockGamut($bq);              # recurse
+
+                       $bq =~ s/^/  /g;
+                       # These leading spaces screw with <pre> content, so we need to fix that:
+                       $bq =~ s{
+                                       (\s*<pre>.+?</pre>)
+                               }{
+                                       my $pre = $1;
+                                       $pre =~ s/^  //mg;
+                                       $pre;
+                               }egsx;
+
+                       "<blockquote>\n$bq\n</blockquote>\n\n";
+               }egmx;
+
+
+       return $text;
+}
+
+
+sub _FormParagraphs {
+#
+#      Params:
+#              $text - string to process with html <p> tags
+#
+       my $text = shift;
+
+       # Strip leading and trailing lines:
+       $text =~ s/\A\n+//;
+       $text =~ s/\n+\z//;
+
+       my @grafs = split(/\n{2,}/, $text);
+
+       #
+       # Wrap <p> tags.
+       #
+       foreach (@grafs) {
+               unless (defined( $g_html_blocks{$_} )) {
+                       $_ = _RunSpanGamut($_);
+                       s/^([ \t]*)/<p>/;
+                       $_ .= "</p>";
+               }
+       }
+
+       #
+       # Unhashify HTML blocks
+       #
+       foreach (@grafs) {
+               if (defined( $g_html_blocks{$_} )) {
+                       $_ = $g_html_blocks{$_};
+               }
+       }
+
+       return join "\n\n", @grafs;
+}
+
+
+sub _EncodeAmpsAndAngles {
+# Smart processing for ampersands and angle brackets that need to be encoded.
+
+       my $text = shift;
+
+       # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
+       #   http://bumppo.net/projects/amputator/
+       $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
+
+       # Encode naked <'s
+       $text =~ s{<(?![a-z/?\$!])}{&lt;}gi;
+
+       return $text;
+}
+
+
+sub _EncodeBackslashEscapes {
+#
+#   Parameter:  String.
+#   Returns:    The string, with after processing the following backslash
+#               escape sequences.
+#
+    local $_ = shift;
+
+    s! \\\\  !$g_escape_table{'\\'}!gx;                # Must process escaped backslashes first.
+    s! \\`   !$g_escape_table{'`'}!gx;
+    s! \\\*  !$g_escape_table{'*'}!gx;
+    s! \\_   !$g_escape_table{'_'}!gx;
+    s! \\\{  !$g_escape_table{'{'}!gx;
+    s! \\\}  !$g_escape_table{'}'}!gx;
+    s! \\\[  !$g_escape_table{'['}!gx;
+    s! \\\]  !$g_escape_table{']'}!gx;
+    s! \\\(  !$g_escape_table{'('}!gx;
+    s! \\\)  !$g_escape_table{')'}!gx;
+    s! \\>   !$g_escape_table{'>'}!gx;
+    s! \\\#  !$g_escape_table{'#'}!gx;
+    s! \\\+  !$g_escape_table{'+'}!gx;
+    s! \\\-  !$g_escape_table{'-'}!gx;
+    s! \\\.  !$g_escape_table{'.'}!gx;
+    s{ \\!  }{$g_escape_table{'!'}}gx;
+
+    return $_;
+}
+
+
+sub _DoAutoLinks {
+       my $text = shift;
+
+       $text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi;
+
+       # Email addresses: <address@domain.foo>
+       $text =~ s{
+               <
+        (?:mailto:)?
+               (
+                       [-.\w]+
+                       \@
+                       [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
+               )
+               >
+       }{
+               _EncodeEmailAddress( _UnescapeSpecialChars($1) );
+       }egix;
+
+       return $text;
+}
+
+
+sub _EncodeEmailAddress {
+#
+#      Input: an email address, e.g. "foo@example.com"
+#
+#      Output: the email address as a mailto link, with each character
+#              of the address encoded as either a decimal or hex entity, in
+#              the hopes of foiling most address harvesting spam bots. E.g.:
+#
+#        <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
+#       x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
+#       &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
+#
+#      Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
+#      mailing list: <http://tinyurl.com/yu7ue>
+#
+
+       my $addr = shift;
+
+       srand;
+       my @encode = (
+               sub { '&#' .                 ord(shift)   . ';' },
+               sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
+               sub {                            shift          },
+       );
+
+       $addr = "mailto:" . $addr;
+
+       $addr =~ s{(.)}{
+               my $char = $1;
+               if ( $char eq '@' ) {
+                       # this *must* be encoded. I insist.
+                       $char = $encode[int rand 1]->($char);
+               } elsif ( $char ne ':' ) {
+                       # leave ':' alone (to spot mailto: later)
+                       my $r = rand;
+                       # roughly 10% raw, 45% hex, 45% dec
+                       $char = (
+                               $r > .9   ?  $encode[2]->($char)  :
+                               $r < .45  ?  $encode[1]->($char)  :
+                                                        $encode[0]->($char)
+                       );
+               }
+               $char;
+       }gex;
+
+       $addr = qq{<a href="$addr">$addr</a>};
+       $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
+
+       return $addr;
+}
+
+
+sub _UnescapeSpecialChars {
+#
+# Swap back in all the special characters we've hidden.
+#
+       my $text = shift;
+
+       while( my($char, $hash) = each(%g_escape_table) ) {
+               $text =~ s/$hash/$char/g;
+       }
+    return $text;
+}
+
+
+sub _TokenizeHTML {
+#
+#   Parameter:  String containing HTML markup.
+#   Returns:    Reference to an array of the tokens comprising the input
+#               string. Each token is either a tag (possibly with nested,
+#               tags contained therein, such as <a href="<MTFoo>">, or a
+#               run of text between tags. Each element of the array is a
+#               two-element array; the first is either 'tag' or 'text';
+#               the second is the actual value.
+#
+#
+#   Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
+#       <http://www.bradchoate.com/past/mtregex.php>
+#
+
+    my $str = shift;
+    my $pos = 0;
+    my $len = length $str;
+    my @tokens;
+
+    my $depth = 6;
+    my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x  $depth);
+    my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) |  # comment
+                   (?s: <\? .*? \?> ) |              # processing instruction
+                   $nested_tags/ix;                   # nested tags
+
+    while ($str =~ m/($match)/g) {
+        my $whole_tag = $1;
+        my $sec_start = pos $str;
+        my $tag_start = $sec_start - length $whole_tag;
+        if ($pos < $tag_start) {
+            push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
+        }
+        push @tokens, ['tag', $whole_tag];
+        $pos = pos $str;
+    }
+    push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
+    \@tokens;
+}
+
+
+sub _Outdent {
+#
+# Remove one level of line-leading tabs or spaces
+#
+       my $text = shift;
+
+       $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
+       return $text;
+}
+
+
+sub _Detab {
+#
+# Cribbed from a post by Bart Lateur:
+# <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
+#
+       my $text = shift;
+
+       $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
+       return $text;
+}
+
+
+1;
+
+__END__
+
+
+=pod
+
+=head1 NAME
+
+B<Markdown>
+
+
+=head1 SYNOPSIS
+
+B<Markdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
+    [ I<file> ... ]
+
+
+=head1 DESCRIPTION
+
+Markdown is a text-to-HTML filter; it translates an easy-to-read /
+easy-to-write structured text format into HTML. Markdown's text format
+is most similar to that of plain text email, and supports features such
+as headers, *emphasis*, code blocks, blockquotes, and links.
+
+Markdown's syntax is designed not as a generic markup language, but
+specifically to serve as a front-end to (X)HTML. You can  use span-level
+HTML tags anywhere in a Markdown document, and you can use block level
+HTML tags (like <div> and <table> as well).
+
+For more information about Markdown's syntax, see:
+
+    http://daringfireball.net/projects/markdown/
+
+
+=head1 OPTIONS
+
+Use "--" to end switch parsing. For example, to open a file named "-z", use:
+
+       Markdown.pl -- -z
+
+=over 4
+
+
+=item B<--html4tags>
+
+Use HTML 4 style for empty element tags, e.g.:
+
+    <br>
+
+instead of Markdown's default XHTML style tags, e.g.:
+
+    <br />
+
+
+=item B<-v>, B<--version>
+
+Display Markdown's version number and copyright information.
+
+
+=item B<-s>, B<--shortversion>
+
+Display the short-form version number.
+
+
+=back
+
+
+
+=head1 BUGS
+
+To file bug reports or feature requests (other than topics listed in the
+Caveats section above) please send email to:
+
+    support@daringfireball.net
+
+Please include with your report: (1) the example input; (2) the output
+you expected; (3) the output Markdown actually produced.
+
+
+=head1 VERSION HISTORY
+
+See the readme file for detailed release notes for this version.
+
+1.0.1 - 14 Dec 2004
+
+1.0 - 28 Aug 2004
+
+
+=head1 AUTHOR
+
+    John Gruber
+    http://daringfireball.net
+
+    PHP port and other contributions by Michel Fortin
+    http://michelf.com
+
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2003-2004 John Gruber   
+<http://daringfireball.net/>   
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name "Markdown" nor the names of its contributors may
+  be used to endorse or promote products derived from this software
+  without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as
+is" and any express or implied warranties, including, but not limited
+to, the implied warranties of merchantability and fitness for a
+particular purpose are disclaimed. In no event shall the copyright owner
+or contributors be liable for any direct, indirect, incidental, special,
+exemplary, or consequential damages (including, but not limited to,
+procurement of substitute goods or services; loss of use, data, or
+profits; or business interruption) however caused and on any theory of
+liability, whether in contract, strict liability, or tort (including
+negligence or otherwise) arising in any way out of the use of this
+software, even if advised of the possibility of such damage.
+
+=cut
diff --git a/filters/html-converters/resources/rst-template.txt b/filters/html-converters/resources/rst-template.txt

new file mode 100644 (file)

index 0000000..43cde42
--- /dev/null
+++ b/filters/html-converters/resources/rst-template.txt
@@ -0,0 +1,4 @@
+%(stylesheet)s
+%(body_pre_docinfo)s
+%(docinfo)s
+%(body)s
diff --git a/filters/html-converters/rst2html b/filters/html-converters/rst2html

new file mode 100755 (executable)

index 0000000..c51f5be
--- /dev/null
+++ b/filters/html-converters/rst2html
@@ -0,0 +1,2 @@
+#!/bin/sh
+rst2html.py --template="$(dirname $0)/resources/rst-template.txt"
diff --git a/filters/html-converters/txt2html b/filters/html-converters/txt2html

new file mode 100755 (executable)

index 0000000..a795995
--- /dev/null
+++ b/filters/html-converters/txt2html
@@ -0,0 +1,4 @@
+#!/bin/sh
+echo "<pre>"
+cat
+echo "</pre>"
diff --git a/filters/syntax-highlighting.py b/filters/syntax-highlighting.py

new file mode 100755 (executable)

index 0000000..dcdba03
--- /dev/null
+++ b/filters/syntax-highlighting.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+
+# This script uses Pygments and Python3. You must have both installed for this to work.
+# http://pygments.org/
+# http://python.org/
+#
+# It may be used with the source-filter or repo.source-filter settings in cgitrc.
+#
+# The following environment variables can be used to retrieve the configuration
+# of the repository for which this script is called:
+# CGIT_REPO_URL        ( = repo.url       setting )
+# CGIT_REPO_NAME       ( = repo.name      setting )
+# CGIT_REPO_PATH       ( = repo.path      setting )
+# CGIT_REPO_OWNER      ( = repo.owner     setting )
+# CGIT_REPO_DEFBRANCH  ( = repo.defbranch setting )
+# CGIT_REPO_SECTION    ( = section        setting )
+# CGIT_REPO_CLONE_URL  ( = repo.clone-url setting )
+
+
+import sys
+import cgi
+import codecs
+from pygments.lexers import get_lexer_for_filename
+from pygments import highlight
+from pygments.formatters import HtmlFormatter
+
+sys.stdin = codecs.getreader("utf-8")(sys.stdin.detach())
+doc = sys.stdin.read()
+try:
+       lexer = get_lexer_for_filename(sys.argv[1])
+       formatter = HtmlFormatter(style='pastie')
+       sys.stdout.write("<style>")
+       sys.stdout.write(formatter.get_style_defs('.highlight'))
+       sys.stdout.write("</style>")
+
+       highlight(doc, lexer, formatter, sys.stdout)
+except:
+       sys.stdout.write(str(cgi.escape(doc).encode("ascii", "xmlcharrefreplace"), "ascii"))
author	Jason A. Donenfeld <Jason@zx2c4.com>
	Mon, 27 May 2013 19:39:43 +0000 (21:39 +0200)
committer	Jason A. Donenfeld <Jason@zx2c4.com>
	Mon, 27 May 2013 19:54:16 +0000 (21:54 +0200)
Makefile		patch \| blob \| history
cgit.css		patch \| blob \| history
filters/about-formatting.sh	[new file with mode: 0755]	patch \| blob
filters/html-converters/man2html	[new file with mode: 0755]	patch \| blob
filters/html-converters/md2html	[new file with mode: 0755]	patch \| blob
filters/html-converters/resources/markdown.pl	[new file with mode: 0755]	patch \| blob
filters/html-converters/resources/rst-template.txt	[new file with mode: 0644]	patch \| blob
filters/html-converters/rst2html	[new file with mode: 0755]	patch \| blob
filters/html-converters/txt2html	[new file with mode: 0755]	patch \| blob
filters/syntax-highlighting.py	[new file with mode: 0755]	patch \| blob