]> granicus.if.org Git - python/commitdiff
Just's script to find non-7-bit-clean charactres in a source tree (slightly
authorJack Jansen <jack.jansen@cwi.nl>
Wed, 3 Feb 1999 12:07:14 +0000 (12:07 +0000)
committerJack Jansen <jack.jansen@cwi.nl>
Wed, 3 Feb 1999 12:07:14 +0000 (12:07 +0000)
modified by Jack to skip files that are binary despite the TEXT type).

Mac/scripts/findgremlins.py [new file with mode: 0644]

diff --git a/Mac/scripts/findgremlins.py b/Mac/scripts/findgremlins.py
new file mode 100644 (file)
index 0000000..fe40e64
--- /dev/null
@@ -0,0 +1,53 @@
+"""findgremlins - Search through a folder and subfolders for
+text files that have characters with bit 8 set, and print
+the filename and a bit of context.
+
+By Just, with a little glue by Jack"""
+
+import macfs
+import re
+import os
+import string
+import sys
+
+xpat = re.compile(r"[\200-\377]")
+
+def walk(top, recurse=1):
+       if os.path.isdir(top):
+               if recurse:
+                       for name in os.listdir(top):
+                               path = os.path.join(top, name)
+                               walk(path)
+       else:
+               cr, tp = macfs.FSSpec(top).GetCreatorType()
+               if tp == 'TEXT' and top[-4:] <> ".hqx":
+                       data = open(top).read()
+                       badcount = 0
+                       for ch in data[:256]:
+                               if ord(ch) == 0 or ord(ch) >= 0200:
+                                       badcount = badcount + 1
+                       if badcount > 16:
+                               print `top`, 'appears to be a binary file'
+                               return
+                       pos = 0
+                       gotone = 0
+                       while 1:
+                               m = xpat.search(data, pos)
+                               if m is None:
+                                       break
+                               if not gotone:
+                                       print `top`
+                                       gotone = 1
+                               [(i, j)] = m.regs
+                               print "     ", string.replace(data[i-15:j+15], '\n', ' ')
+                               pos = j
+
+def main():
+       fss, ok = macfs.GetDirectory()
+       if ok:
+               walk(fss.as_pathname())
+               
+if __name__ == '__main__':
+       main()
+       sys.exit(1) # So we see the output
+