From 53ff09e06fddfe5909f52784d6a0b9d42d389f43 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Wed, 19 Jul 2017 22:04:56 +0000 Subject: [PATCH] [opt-viewer] Reduce memory consumption The observation is that we have a lot of similar remarks with lots of identical strings (e.g. file paths, text from the remark). Storing a copy of each of those strings in memory is wasteful. This makes all the strings in the remark interned which maintains a single immutable instance that is referenced everywhere. I get an average 20% heap size reduction with this but it's possible that this varies with the typical length of the file paths used. (I used heapy to report the heap size.) Runtime is same or a tiny bit better. | # of files | 60 | 114 | 308 | 605 | 1370 | | # of remarks | 20K | 37K | 146K | 180K | 640K | | total file size (MB) | 22 | 51 | 219 | 202 | 1034 | |-----------------------+------+------+------+------+------| | Heap size before (MB) | 106 | 226 | 894 | 934 | 3573 | | Heap size after | 86 | 179 | 694 | 739 | 2798 | | Rate | 0.81 | 0.79 | 0.78 | 0.79 | 0.78 | |-----------------------+------+------+------+------+------| | Average remark size | 4.30 | 4.84 | 4.75 | 4.11 | 4.37 | | Mem2disk ratio | 3.91 | 3.51 | 3.17 | 3.66 | 2.71 | Differential Revision: https://reviews.llvm.org/D35534 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308536 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/opt-viewer/optrecord.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/tools/opt-viewer/optrecord.py b/tools/opt-viewer/optrecord.py index 4599e12d7e6..adc2016017c 100644 --- a/tools/opt-viewer/optrecord.py +++ b/tools/opt-viewer/optrecord.py @@ -60,11 +60,34 @@ class Remark(yaml.YAMLObject): # Work-around for http://pyyaml.org/ticket/154. yaml_loader = Loader - def initmissing(self): + def _intern_strings(self): + self.Pass = intern(self.Pass) + self.Name = intern(self.Name) + self.Function = intern(self.Function) + + # Intern key and value if string and recurse if value is a dictionary. + # This handles [{'Caller': ..., 'DebugLoc': { 'File': ... }}] + def _intern_dict(old_dict): + new_dict = dict() + for (k, v) in old_dict.iteritems(): + if type(k) is str: + k = intern(k) + + if type(v) is str: + v = intern(v) + elif type(v) is dict: + v = _intern_dict(v) + new_dict[k] = v + return new_dict + + self.Args = [_intern_dict(arg_dict) for arg_dict in self.Args] + + def canonicalize(self): if not hasattr(self, 'Hotness'): self.Hotness = 0 if not hasattr(self, 'Args'): self.Args = [] + self._intern_strings() @property def File(self): @@ -193,7 +216,7 @@ def get_remarks(input_file): with open(input_file) as f: docs = yaml.load_all(f, Loader=Loader) for remark in docs: - remark.initmissing() + remark.canonicalize() # Avoid remarks withoug debug location or if they are duplicated if not hasattr(remark, 'DebugLoc') or remark.key in all_remarks: continue -- 2.50.1