granicus.if.org Git - zziplib/blob - docs/cpp2markdown-1.py

   1 #! /usr/bin/env python
   2 import pygments.lexers.compiled as lexer
   3 import optparse
   4 import re
   5 from pygments.token import Token
   6 import logging
   7
   8 logg = logging.getLogger(__name__)
   9
  10 FileComment = "FileComment"
  11 FileInclude = "FileInclude"
  12 FunctionComment = "FunctionComment"
  13 FunctionPrototype = "FunctionPrototype"
  14
  15 # use the markdown lexer to identify elements
  16 # then filter only those we want. The returned
  17 # token list is more global flagging the role
  18 # of each token for the manual generation.
  19 class CppToMarkdown:
  20     def __init__(self):
  21         self.alldefinitions = 0
  22         self.internaldefs = ["static"]
  23         self.filecomment_done = ""
  24         self.fileinclude_done = ""
  25         self.filecomment_text = ""
  26         self.fileinclude_text = ""
  27         self.comment_text = ""
  28         self.function_text = ""
  29         self.nesting = 0
  30     def split_copyright(self, text):
  31         # there are two modes - the copyright starts in the first line
  32         # and the source description follows or the other way round.
  33         lines = text.split("\n")
  34         if len(lines) <= 2:
  35             return "", text
  36         introtext = [lines[0]]
  37         copyright = [lines[0]]
  38         check1 = re.compile(r"^\s[*]\s+[(][c][C][)]")
  39         check2 = re.compile(r"^\s[*]\s+\b[Cc]opyright\b")
  40         empty1 = re.compile(r"^\s[*]\s*$")
  41         state = "intro"
  42         for i in xrange(1,len(lines)-1):
  43             line = lines[i]
  44             if state == "intro":
  45                 if empty1.match(line):
  46                     introtext += [ line ]
  47                     continue
  48                 if check1.match(line) or check2.match(line):
  49                     state = "copyrightfirst"
  50                     copyright += [ line ]
  51                 else:
  52                     state = "introtextfirst"
  53                     introtext += [ line ]
  54             elif state == "copyrightfirst":
  55                 if empty1.match(line):
  56                     state = "introtextlast"
  57                     introtext += [ line ]
  58                 else:
  59                     copyright += [ line ]
  60             elif state == "introtextfirst":
  61                 if check1.match(line) or check2.match(line):
  62                     state = "copyrightlast"
  63                     copyright += [ line ]
  64                 else:
  65                     introtext += [ line ]
  66             elif state == "copyrightlast":
  67                 copyright += [ line ]
  68             elif state == "introtextlast":
  69                 introtext += [ line ]
  70             else:
  71                 logg.fatal("UNKNOWN STATE %s", state)
  72         introtext += [lines[-1]]
  73         copyright += [lines[-1]]
  74         logg.debug("@ COPYRIGHT\n %s", copyright)
  75         logg.debug("@ INTROTEXT\n %s", introtext)
  76         return "\n".join(copyright), "\n".join(introtext)
  77     def commentblock(self, text):
  78         prefix = re.compile(r"(?s)^\s*[/][*]+([^\n]*)(?=\n)")
  79         suffix = re.compile(r"(?s)\n [*][/]\s*")
  80         empty = re.compile(r"(?s)\n [*][ \t]*(?=\n)")
  81         lines1 = re.compile(r"(?s)\n [*][ ][\t]")
  82         lines2 = re.compile(r"(?s)\n [*][ ]")
  83         lines3 = re.compile(r"(?s)\n [*][\t][\t]")
  84         lines4 = re.compile(r"(?s)\n [*][\t]")
  85         text = suffix.sub("\n", text)
  86         text = prefix.sub("> \\1\n", text)
  87         text = empty.sub("\n", text)
  88         text = lines1.sub("\n     ", text)
  89         text = lines2.sub("\n", text)
  90         text = lines3.sub("\n         ", text)
  91         text = lines4.sub("\n     ", text)
  92         return text
  93     def functionblock(self, text):
  94         empty = re.compile(r"(?s)\n[ \t]*(?=\n)")
  95         text = "    " + text.replace("\n", "\n    ")
  96         text = empty.sub("", text)
  97         return text
  98     def functionname(self, text):
  99         check1 = re.compile(r"^[^()=]*(\b\w+)\s*[(=]")
 100         found = check1.match(text)
 101         if found:
 102             return found.group(1)
 103         check2 = re.compile(r"^[^()=]*(\b\w+)\s*$")
 104         found = check2.match(text)
 105         if found:
 106             return found.group(1)
 107         return ""
 108     def run(self, filename):
 109         filetext = open(filename).read()
 110         for line in self.process(filetext, filename):
 111             print line
 112     def process(self, filetext, filename=""):
 113         section_ruler = "-----------------------------------------"
 114         copyright = ""
 115         for token, text in self.parse(filetext):
 116             if token == FileInclude:
 117                 yield "## SOURCE " + filename.replace("../", "")
 118                 yield "    #" + text.replace("\n", "\n    ")
 119             elif token == FileComment:
 120                 yield "### INTRODUCTION"
 121                 copyright, introduction = self.split_copyright(text)
 122                 yield self.commentblock(introduction)
 123             elif token == FunctionPrototype:
 124                 name = self.functionname(text)
 125                 yield section_ruler
 126                 yield "### " + name
 127                 yield '<a id="%s"></a>' % name
 128                 yield "#### NAME"
 129                 yield "    " + name
 130                 yield "#### SYNOPSIS"
 131                 yield self.functionblock(text)
 132             elif token == FunctionComment:
 133                 if text:
 134                     yield "#### DESCRIPTION"
 135                     yield self.commentblock(text)
 136             else:
 137                 if text:
 138                     yield "#### NOTES"
 139                     print token, text.replace("\n", "\n  ")
 140         if copyright:
 141             yield section_ruler
 142             yield "### COPYRIGHT"
 143             yield self.commentblock(copyright)
 144     def isexported_function(self):
 145         function = self.function_text.strip().replace("\n"," ")
 146         logg.debug("@ --------------------------------------")
 147         logg.debug("@ ALLDEFINITIONS %s", self.alldefinitions)
 148         if function.startswith("static ") and self.alldefinitions < 3:
 149             logg.debug("@ ONLY INTERNAL %s", function)
 150             return False
 151         if not self.comment_text:
 152             if not self.alldefinitions:
 153                 logg.info("@ NO COMMENT ON %s", function)
 154                 return False
 155             else:
 156                 logg.warn("@ NO COMMENT ON %s", function)
 157         text = self.comment_text
 158         if text.startswith("/**"): return True
 159         if text.startswith("/*!"): return True
 160         if text.startswith("///"): return True
 161         if text.startswith("//!"): return True
 162         if self.alldefinitions >= 1:
 163             if text.startswith("/*"): return True
 164             if text.startswith("//"): return True
 165         if self.alldefinitions >= 2:
 166             return True
 167         logg.debug("@ NO ** COMMENT %s", self.function_text.strip())
 168         defs = self.function_text
 169         return False
 170     def parse(self, filetext):
 171         c = lexer.CLexer()
 172         for token, text in c.get_tokens(filetext):
 173             logg.debug("|| %s %s", token, text.replace("\n", "\n |"))
 174             # completion
 175             if token != Token.Comment.Preproc and self.fileinclude_done == "no":
 176                     yield FileInclude, self.fileinclude_text
 177                     if self.filecomment_text:
 178                         yield FileComment, self.filecomment_text
 179                     self.fileinclude_done = "done"
 180             # parsing
 181             if token == Token.Comment.Multiline:
 182                 if not self.filecomment_done:
 183                     self.filecomment_done = "done"
 184                     self.filecomment_text = text
 185                     # wait until we know it is not a function documentation
 186                     self.comment_text = text
 187                 else:
 188                     self.comment_text = text
 189             elif token == Token.Comment.Preproc and "include" in text:
 190                 if not self.fileinclude_done:
 191                     self.fileinclude_done = "no"
 192                     self.fileinclude_text += text
 193                     self.comment_text = ""
 194             elif token == Token.Comment.Preproc and self.fileinclude_done == "no":
 195                 if not "\n" in self.fileinclude_text:
 196                     self.fileinclude_text += text
 197                 self.comment_text = ""
 198             elif token == Token.Comment.Preproc:
 199                     self.comment_text = ""
 200                     self.function_text = ""
 201             elif token == Token.Operator and text == "=":
 202                 if not self.nesting and self.function_text.strip():
 203                     if self.isexported_function():
 204                         yield FunctionPrototype, self.function_text
 205                         yield FunctionComment, self.comment_text
 206                 self.comment_text = ""
 207                 self.function_text = ""
 208             elif token == Token.Punctuation and text == ";":
 209                 self.comment_text = ""
 210                 self.function_text = ""
 211             elif token == Token.Punctuation and text == "{":
 212                 if not self.nesting and self.function_text.strip():
 213                     if self.isexported_function():
 214                         yield FunctionPrototype, self.function_text
 215                         yield FunctionComment, self.comment_text
 216                 self.comment_text = ""
 217                 self.function_text = ""
 218                 self.nesting += 1
 219             elif token == Token.Punctuation and text == "}":
 220                 self.nesting -= 1
 221                 self.comment_text = ""
 222                 self.function_text = ""
 223             else:
 224                 if not self.nesting:
 225                     self.function_text += text
 226                 else:
 227                     pass # yield "|",text
 228
 229
 230 if __name__ == "__main__":
 231     _o = optparse.OptionParser()
 232     _o.add_option("-v", "--verbose", action="count", default=0)
 233     _o.add_option("-a", "--all", action="count", default=0,
 234                   help="include all definitions in the output (not only /**)")
 235     opt, args = _o.parse_args()
 236
 237     logg.addHandler(logging.StreamHandler())
 238     if opt.verbose:
 239         logg.setLevel(logging.WARN - 10 * opt.verbose)
 240
 241     c = CppToMarkdown()
 242     if opt.all:
 243         c.alldefinitions = opt.all
 244     for arg in args:
 245         c.run(arg)
 246
 247
 248
 249