]> granicus.if.org Git - clang/commitdiff
Add python tool to dump and construct header maps
authorBruno Cardoso Lopes <bruno.cardoso@gmail.com>
Wed, 20 Jun 2018 21:16:37 +0000 (21:16 +0000)
committerBruno Cardoso Lopes <bruno.cardoso@gmail.com>
Wed, 20 Jun 2018 21:16:37 +0000 (21:16 +0000)
Header maps are binary files used by Xcode, which are used to map
header names or paths to other locations. Clang has support for
those since its inception, but there's not a lot of header map
testing around.

Since it's a binary format, testing becomes pretty much brittle
and its hard to even know what's inside if you don't have the
appropriate tools.

Add a python based tool that allows creating and dumping header
maps based on a json description of those. While here, rewrite
tests to use the tool and remove the binary files from the tree.

This tool was initially written by Daniel Dunbar.

Differential Revision: https://reviews.llvm.org/D46485

rdar://problem/39994722

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@335177 91177308-0d34-0410-b5e6-96231b3b80d8

14 files changed:
CMakeLists.txt
test/CMakeLists.txt
test/Modules/crash-vfs-headermaps.m
test/Preprocessor/Inputs/headermap-rel/foo.hmap [deleted file]
test/Preprocessor/Inputs/headermap-rel/foo.hmap.json [new file with mode: 0644]
test/Preprocessor/Inputs/headermap-rel2/project-headers.hmap [deleted file]
test/Preprocessor/Inputs/headermap-rel2/project-headers.hmap.json [new file with mode: 0644]
test/Preprocessor/Inputs/nonportable-hmaps/foo.hmap [deleted file]
test/Preprocessor/Inputs/nonportable-hmaps/foo.hmap.json [new file with mode: 0644]
test/Preprocessor/headermap-rel.c
test/Preprocessor/headermap-rel2.c
test/Preprocessor/nonportable-include-with-hmap.c
utils/hmaptool/CMakeLists.txt [new file with mode: 0644]
utils/hmaptool/hmaptool [new file with mode: 0755]

index ab81ec34b00d05f60196c20584be959187566482..ae8835d751c0c59cecc9140ef7ab929ed0c3e60b 100644 (file)
@@ -753,6 +753,7 @@ endif()
 if (LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION)
   add_subdirectory(utils/ClangVisualizers)
 endif()
+add_subdirectory(utils/hmaptool)
 
 configure_file(
   ${CLANG_SOURCE_DIR}/include/clang/Config/config.h.cmake
index 8efe3600c54079b72db4fd482a8011d8c9d36863..8d6db5348e08c802279c4af5523b9a330fcb4c66 100644 (file)
@@ -54,6 +54,7 @@ list(APPEND CLANG_TEST_DEPS
   clang-rename
   clang-refactor
   clang-diff
+  hmaptool
   )
   
 if(CLANG_ENABLE_STATIC_ANALYZER)
index 4f88f3ba1197cb959093ea18b590963f710a0db0..d3353460280085af5aa89c2b2ed2631094d36622 100644 (file)
@@ -1,15 +1,9 @@
 // REQUIRES: crash-recovery, shell, system-darwin
 
-// This uses a headermap with this entry:
-//   Foo.h -> Foo/Foo.h
-
-// Copy out the headermap from test/Preprocessor/Inputs/headermap-rel and avoid
-// adding another binary format to the repository.
-
 // RUN: rm -rf %t
-// RUN: mkdir -p %t/m
-// RUN: cp -a %S/../Preprocessor/Inputs/headermap-rel %t/i
+// RUN: mkdir -p %t/m %t/i/Foo.framework/Headers
 // RUN: echo '// Foo.h' > %t/i/Foo.framework/Headers/Foo.h
+// RUN: hmaptool write %S/../Preprocessor/Inputs/headermap-rel/foo.hmap.json %t/i/foo.hmap
 
 // RUN: not env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \
 // RUN: %clang -fsyntax-only -fmodules -fmodules-cache-path=%t/m %s \
diff --git a/test/Preprocessor/Inputs/headermap-rel/foo.hmap b/test/Preprocessor/Inputs/headermap-rel/foo.hmap
deleted file mode 100644 (file)
index 783c64e..0000000
Binary files a/test/Preprocessor/Inputs/headermap-rel/foo.hmap and /dev/null differ
diff --git a/test/Preprocessor/Inputs/headermap-rel/foo.hmap.json b/test/Preprocessor/Inputs/headermap-rel/foo.hmap.json
new file mode 100644 (file)
index 0000000..ccfd911
--- /dev/null
@@ -0,0 +1,6 @@
+{
+  "mappings" :
+    {
+     "Foo.h" : "Foo/Foo.h"
+    }
+}
diff --git a/test/Preprocessor/Inputs/headermap-rel2/project-headers.hmap b/test/Preprocessor/Inputs/headermap-rel2/project-headers.hmap
deleted file mode 100644 (file)
index a0770fb..0000000
Binary files a/test/Preprocessor/Inputs/headermap-rel2/project-headers.hmap and /dev/null differ
diff --git a/test/Preprocessor/Inputs/headermap-rel2/project-headers.hmap.json b/test/Preprocessor/Inputs/headermap-rel2/project-headers.hmap.json
new file mode 100644 (file)
index 0000000..e03703b
--- /dev/null
@@ -0,0 +1,6 @@
+{
+  "mappings" :
+    {
+     "someheader.h" : "Product/someheader.h"
+    }
+}
diff --git a/test/Preprocessor/Inputs/nonportable-hmaps/foo.hmap b/test/Preprocessor/Inputs/nonportable-hmaps/foo.hmap
deleted file mode 100644 (file)
index 9036f20..0000000
Binary files a/test/Preprocessor/Inputs/nonportable-hmaps/foo.hmap and /dev/null differ
diff --git a/test/Preprocessor/Inputs/nonportable-hmaps/foo.hmap.json b/test/Preprocessor/Inputs/nonportable-hmaps/foo.hmap.json
new file mode 100644 (file)
index 0000000..c69f1df
--- /dev/null
@@ -0,0 +1,6 @@
+{
+  "mappings" :
+    {
+     "Foo/Foo.h" : "headers/foo/Foo.h"
+    }
+}
index 38500a70f69717790727b90ff717a16c93c18310..91f0d957e5a647350da4edc8d3902089fa8f729f 100644 (file)
@@ -1,8 +1,6 @@
-
-// This uses a headermap with this entry:
-//   Foo.h -> Foo/Foo.h
-
-// RUN: %clang_cc1 -E %s -o %t.i -I %S/Inputs/headermap-rel/foo.hmap -F %S/Inputs/headermap-rel
+// RUN: rm -f %t.hmap
+// RUN: hmaptool write %S/Inputs/headermap-rel/foo.hmap.json %t.hmap
+// RUN: %clang_cc1 -E %s -o %t.i -I %t.hmap -F %S/Inputs/headermap-rel
 // RUN: FileCheck %s -input-file %t.i
 
 // CHECK: Foo.h is parsed
index d61f3385b22d1c0e4a3edcde413cf8b2481fb488..172821efa4251ca68cc615ce2a63f7e9c4b8a4f6 100644 (file)
@@ -1,8 +1,7 @@
-// This uses a headermap with this entry:
-//   someheader.h -> Product/someheader.h
-
-// RUN: %clang_cc1 -v -fsyntax-only %s -iquote %S/Inputs/headermap-rel2/project-headers.hmap -isystem %S/Inputs/headermap-rel2/system/usr/include -I %S/Inputs/headermap-rel2 -H
-// RUN: %clang_cc1 -fsyntax-only %s -iquote %S/Inputs/headermap-rel2/project-headers.hmap -isystem %S/Inputs/headermap-rel2/system/usr/include -I %S/Inputs/headermap-rel2 -H 2> %t.out
+// RUN: rm -f %t.hmap
+// RUN: hmaptool write %S/Inputs/headermap-rel2/project-headers.hmap.json %t.hmap
+// RUN: %clang_cc1 -v -fsyntax-only %s -iquote %t.hmap -isystem %S/Inputs/headermap-rel2/system/usr/include -I %S/Inputs/headermap-rel2 -H
+// RUN: %clang_cc1 -fsyntax-only %s -iquote %t.hmap -isystem %S/Inputs/headermap-rel2/system/usr/include -I %S/Inputs/headermap-rel2 -H 2> %t.out
 // RUN: FileCheck %s -input-file %t.out
 
 // CHECK: Product/someheader.h
index fc958e7e5e3f03d007f2ab8f0c24de5d0cb570a4..0190d13498a2d4742e30ed7e29038a88ab3fb54c 100644 (file)
@@ -1,5 +1,7 @@
+// RUN: rm -f %t.hmap
+// RUN: hmaptool write %S/Inputs/nonportable-hmaps/foo.hmap.json %t.hmap
 // RUN: %clang_cc1 -Eonly                        \
-// RUN:   -I%S/Inputs/nonportable-hmaps/foo.hmap \
+// RUN:   -I%t.hmap \
 // RUN:   -I%S/Inputs/nonportable-hmaps          \
 // RUN:   %s -verify
 //
diff --git a/utils/hmaptool/CMakeLists.txt b/utils/hmaptool/CMakeLists.txt
new file mode 100644 (file)
index 0000000..f5cc7d8
--- /dev/null
@@ -0,0 +1,16 @@
+set(CLANG_HMAPTOOL hmaptool)
+
+add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/bin/${CLANG_HMAPTOOL}
+                   COMMAND ${CMAKE_COMMAND} -E make_directory
+                     ${CMAKE_BINARY_DIR}/bin
+                   COMMAND ${CMAKE_COMMAND} -E copy
+                     ${CMAKE_CURRENT_SOURCE_DIR}/${CLANG_HMAPTOOL}
+                     ${CMAKE_BINARY_DIR}/bin/
+                   DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${CLANG_HMAPTOOL})
+
+list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${CLANG_HMAPTOOL})
+install(PROGRAMS ${CLANG_HMAPTOOL} DESTINATION bin)
+
+add_custom_target(hmaptool ALL DEPENDS ${Depends})
+set_target_properties(hmaptool PROPERTIES FOLDER "Utils")
+
diff --git a/utils/hmaptool/hmaptool b/utils/hmaptool/hmaptool
new file mode 100755 (executable)
index 0000000..2b1ca74
--- /dev/null
@@ -0,0 +1,296 @@
+#!/usr/bin/env python
+from __future__ import print_function
+
+import json
+import optparse
+import os
+import struct
+import sys
+
+###
+
+k_header_magic_LE = 'pamh'
+k_header_magic_BE = 'hmap'
+
+def hmap_hash(str):
+    """hash(str) -> int
+
+    Apply the "well-known" headermap hash function.
+    """
+
+    return sum((ord(c.lower()) * 13
+                for c in str), 0)
+
+class HeaderMap(object):
+    @staticmethod
+    def frompath(path):
+        with open(path, 'rb') as f:
+            magic = f.read(4)
+            if magic == k_header_magic_LE:
+                endian_code = '<'
+            elif magic == k_header_magic_BE:
+                endian_code = '>'
+            else:
+                raise SystemExit("error: %s: not a headermap" % (
+                        path,))
+
+            # Read the header information.
+            header_fmt = endian_code + 'HHIIII'
+            header_size = struct.calcsize(header_fmt)
+            data = f.read(header_size)
+            if len(data) != header_size:
+                raise SystemExit("error: %s: truncated headermap header" % (
+                        path,))
+
+            (version, reserved, strtable_offset, num_entries,
+             num_buckets, max_value_len) = struct.unpack(header_fmt, data)
+
+            if version != 1:
+                raise SystemExit("error: %s: unknown headermap version: %r" % (
+                        path, version))
+            if reserved != 0:
+                raise SystemExit("error: %s: invalid reserved value in header" % (
+                        path,))
+
+            # The number of buckets must be a power of two.
+            if num_buckets == 0 or (num_buckets & num_buckets - 1) != 0:
+                raise SystemExit("error: %s: invalid number of buckets" % (
+                        path,))
+
+            # Read all of the buckets.
+            bucket_fmt = endian_code + 'III'
+            bucket_size = struct.calcsize(bucket_fmt)
+            buckets_data = f.read(num_buckets * bucket_size)
+            if len(buckets_data) != num_buckets * bucket_size:
+                raise SystemExit("error: %s: truncated headermap buckets" % (
+                        path,))
+            buckets = [struct.unpack(bucket_fmt,
+                                     buckets_data[i*bucket_size:(i+1)*bucket_size])
+                       for i in range(num_buckets)]
+
+            # Read the string table; the format doesn't explicitly communicate the
+            # size of the string table (which is dumb), so assume it is the rest of
+            # the file.
+            f.seek(0, 2)
+            strtable_size = f.tell() - strtable_offset
+            f.seek(strtable_offset)
+
+            if strtable_size == 0:
+                raise SystemExit("error: %s: unable to read zero-sized string table"%(
+                        path,))
+            strtable = f.read(strtable_size)
+
+            if len(strtable) != strtable_size:
+                raise SystemExit("error: %s: unable to read complete string table"%(
+                        path,))
+            if strtable[-1] != '\0':
+                raise SystemExit("error: %s: invalid string table in headermap" % (
+                        path,))
+
+            return HeaderMap(num_entries, buckets, strtable)
+
+    def __init__(self, num_entries, buckets, strtable):
+        self.num_entries = num_entries
+        self.buckets = buckets
+        self.strtable = strtable
+
+    def get_string(self, idx):
+        if idx >= len(self.strtable):
+            raise SystemExit("error: %s: invalid string index" % (
+                    path,))
+        end_idx = self.strtable.index('\0', idx)
+        return self.strtable[idx:end_idx]
+
+    @property
+    def mappings(self):
+        for key_idx,prefix_idx,suffix_idx in self.buckets:
+            if key_idx == 0:
+                continue
+            yield (self.get_string(key_idx),
+                   self.get_string(prefix_idx) + self.get_string(suffix_idx))
+
+###
+
+def action_dump(name, args):
+    "dump a headermap file"
+
+    parser = optparse.OptionParser("%%prog %s [options] <headermap path>" % (
+            name,))
+    parser.add_option("-v", "--verbose", dest="verbose",
+                      help="show more verbose output [%default]",
+                      action="store_true", default=False)
+    (opts, args) = parser.parse_args(args)
+
+    if len(args) != 1:
+        parser.error("invalid number of arguments")
+
+    path, = args
+
+    hmap = HeaderMap.frompath(path)
+
+    # Dump all of the buckets.
+    print ('Header Map: %s' % (path,))
+    if opts.verbose:
+        print ('headermap: %r' % (path,))
+        print ('  num entries: %d' % (hmap.num_entries,))
+        print ('  num buckets: %d' % (len(hmap.buckets),))
+        print ('  string table size: %d' % (len(hmap.strtable),))
+        for i,bucket in enumerate(hmap.buckets):
+            key_idx,prefix_idx,suffix_idx = bucket
+
+            if key_idx == 0:
+                continue
+
+            # Get the strings.
+            key = hmap.get_string(key_idx)
+            prefix = hmap.get_string(prefix_idx)
+            suffix = hmap.get_string(suffix_idx)
+
+            print ("  bucket[%d]: %r -> (%r, %r) -- %d" % (
+                i, key, prefix, suffix, (hmap_hash(key) & (num_buckets - 1))))
+    else:
+        mappings = sorted(hmap.mappings)
+        for key,value in mappings:
+            print ("%s -> %s" % (key, value))
+    print ()
+
+def next_power_of_two(value):
+    if value < 0:
+        raise ArgumentError
+    return 1 if value == 0 else 2**(value - 1).bit_length()
+
+def action_write(name, args):
+    "write a headermap file from a JSON definition"
+
+    parser = optparse.OptionParser("%%prog %s [options] <input path> <output path>" % (
+            name,))
+    (opts, args) = parser.parse_args(args)
+
+    if len(args) != 2:
+        parser.error("invalid number of arguments")
+
+    input_path,output_path = args
+
+    with open(input_path, "r") as f:
+        input_data = json.load(f)
+
+    # Compute the headermap contents, we make a table that is 1/3 full.
+    mappings = input_data['mappings']
+    num_buckets = next_power_of_two(len(mappings) * 3)
+
+    table = [(0, 0, 0)
+             for i in range(num_buckets)]
+    max_value_len = 0
+    strtable = "\0"
+    for key,value in mappings.items():
+        if not isinstance(key, str):
+            key = key.decode('utf-8')
+        if not isinstance(value, str):
+            value = value.decode('utf-8')
+        max_value_len = max(max_value_len, len(value))
+
+        key_idx = len(strtable)
+        strtable += key + '\0'
+        prefix = os.path.dirname(value) + '/'
+        suffix = os.path.basename(value)
+        prefix_idx = len(strtable)
+        strtable += prefix + '\0'
+        suffix_idx = len(strtable)
+        strtable += suffix + '\0'
+
+        hash = hmap_hash(key)
+        for i in range(num_buckets):
+            idx = (hash + i) % num_buckets
+            if table[idx][0] == 0:
+                table[idx] = (key_idx, prefix_idx, suffix_idx)
+                break
+        else:
+            raise RuntimeError
+
+    endian_code = '<'
+    magic = k_header_magic_LE
+    magic_size = 4
+    header_fmt = endian_code + 'HHIIII'
+    header_size = struct.calcsize(header_fmt)
+    bucket_fmt = endian_code + 'III'
+    bucket_size = struct.calcsize(bucket_fmt)
+    strtable_offset = magic_size + header_size + num_buckets * bucket_size
+    header = (1, 0, strtable_offset, len(mappings),
+              num_buckets, max_value_len)
+
+    # Write out the headermap.
+    with open(output_path, 'wb') as f:
+        f.write(magic.encode())
+        f.write(struct.pack(header_fmt, *header))
+        for bucket in table:
+            f.write(struct.pack(bucket_fmt, *bucket))
+        f.write(strtable.encode())
+
+def action_tovfs(name, args):
+    "convert a headermap to a VFS layout"
+
+    parser = optparse.OptionParser("%%prog %s [options] <headermap path>" % (
+            name,))
+    parser.add_option("", "--build-path", dest="build_path",
+                      help="build path prefix",
+                      action="store", type=str)
+    (opts, args) = parser.parse_args(args)
+
+    if len(args) != 2:
+        parser.error("invalid number of arguments")
+    if opts.build_path is None:
+        parser.error("--build-path is required")
+
+    input_path,output_path = args
+
+    hmap = HeaderMap.frompath(input_path)
+
+    # Create the table for all the objects.
+    vfs = {}
+    vfs['version'] = 0
+    build_dir_contents = []
+    vfs['roots'] = [{
+            'name' : opts.build_path,
+            'type' : 'directory',
+            'contents' : build_dir_contents }]
+
+    # We assume we are mapping framework paths, so a key of "Foo/Bar.h" maps to
+    # "<build path>/Foo.framework/Headers/Bar.h".
+    for key,value in hmap.mappings:
+        # If this isn't a framework style mapping, ignore it.
+        components = key.split('/')
+        if len(components) != 2:
+            continue
+        framework_name,header_name = components
+        build_dir_contents.append({
+                'name' : '%s.framework/Headers/%s' % (framework_name,
+                                                      header_name),
+                'type' : 'file',
+                'external-contents' : value })
+
+    with open(output_path, 'w') as f:
+        json.dump(vfs, f, indent=2)
+
+commands = dict((name[7:].replace("_","-"), f)
+                for name,f in locals().items()
+                if name.startswith('action_'))
+
+def usage():
+    print ("Usage: %s command [options]" % (
+        os.path.basename(sys.argv[0])), file=sys.stderr)
+    print (file=sys.stderr)
+    print ("Available commands:", file=sys.stderr)
+    cmds_width = max(map(len, commands))
+    for name,func in sorted(commands.items()):
+        print ("  %-*s - %s" % (cmds_width, name, func.__doc__), file=sys.stderr)
+    sys.exit(1)
+
+def main():
+    if len(sys.argv) < 2 or sys.argv[1] not in commands:
+        usage()
+
+    cmd = sys.argv[1]
+    commands[cmd](cmd, sys.argv[2:])
+
+if __name__ == '__main__':
+    main()